chromiumoxide/handler/
network.rs

1use super::blockers::{
2    block_websites::block_xhr,
3    ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
4    intercept_manager::NetworkInterceptManager,
5    scripts::{
6        URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE,
7        URL_IGNORE_TRIE_PATHS,
8    },
9    xhr::IGNORE_XHR_ASSETS,
10};
11use crate::auth::Credentials;
12use crate::cmd::CommandChain;
13use crate::handler::http::HttpRequest;
14use aho_corasick::AhoCorasick;
15use case_insensitive_string::CaseInsensitiveString;
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17    EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
18    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19    InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
20    SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23    fetch::{
24        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26        RequestPattern,
27    },
28    network::SetBypassServiceWorkerParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::{
31    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
32};
33use chromiumoxide_types::{Command, Method, MethodId};
34use hashbrown::{HashMap, HashSet};
35use lazy_static::lazy_static;
36use reqwest::header::PROXY_AUTHORIZATION;
37use std::collections::VecDeque;
38use std::time::Duration;
39
40lazy_static! {
41    /// General patterns for popular libraries and resources
42    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
43        "jquery",           // Covers jquery.min.js, jquery.js, etc.
44        "angular",
45        "react",            // Covers all React-related patterns
46        "vue",              // Covers all Vue-related patterns
47        "bootstrap",
48        "d3",
49        "lodash",
50        "ajax",
51        "application",
52        "app",              // Covers general app scripts like app.js
53        "main",
54        "index",
55        "bundle",
56        "vendor",
57        "runtime",
58        "polyfill",
59        "scripts",
60        "es2015.",
61        "es2020.",
62        "webpack",
63        "/wp-content/js/",  // Covers Wordpress content
64        // Verified 3rd parties for request
65        "https://m.stripe.network/",
66        "https://challenges.cloudflare.com/",
67        "https://www.google.com/recaptcha/api.js",
68        "https://google.com/recaptcha/api.js",
69        "https://js.stripe.com/",
70        "https://cdn.prod.website-files.com/", // webflow cdn scripts
71        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
72        "https://code.jquery.com/jquery-"
73    ];
74
75    /// Determine if a script should be rendered in the browser by name.
76    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).unwrap();
77
78    /// path of a js framework
79    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
80        phf::phf_set! {
81            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
82            "_next/static/", "_astro/", "_app/immutable"
83        }
84    };
85
86    /// Ignore the content types.
87    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
88        "application/pdf",
89        "application/zip",
90        "application/x-rar-compressed",
91        "application/x-tar",
92        "image/png",
93        "image/jpeg",
94        "image/gif",
95        "image/bmp",
96        "image/svg+xml",
97        "video/mp4",
98        "video/x-msvideo",
99        "video/x-matroska",
100        "video/webm",
101        "audio/mpeg",
102        "audio/ogg",
103        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
104        "application/vnd.ms-excel",
105        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
106        "application/vnd.ms-powerpoint",
107        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
108        "application/x-7z-compressed",
109        "application/x-rpm",
110        "application/x-shockwave-flash",
111        "application/rtf",
112    };
113
114    /// Ignore the resources for visual content types.
115    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
116        "Image",
117        "Media",
118        "Font"
119    };
120
121    /// Ignore the resources for visual content types.
122    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
123        "CspViolationReport",
124        "Manifest",
125        "Other",
126        "Prefetch",
127        "Ping",
128    };
129
130    /// Case insenstive css matching
131    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
132
133    /// The command chain.
134    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
135        let enable = EnableParams::default();
136
137        if let Ok(c) = serde_json::to_value(&enable) {
138            vec![(enable.identifier(), c)]
139        } else {
140            vec![]
141        }
142    };
143
144    /// The command chain with https ignore.
145    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
146        let enable = EnableParams::default();
147        let mut v = vec![];
148        if let Ok(c) = serde_json::to_value(&enable) {
149            v.push((enable.identifier(), c));
150        }
151        let ignore = SetIgnoreCertificateErrorsParams::new(true);
152        if let Ok(ignored) = serde_json::to_value(&ignore) {
153            v.push((ignore.identifier(), ignored));
154        }
155
156        v
157    };
158
159    /// Enable the fetch intercept command
160    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
161        fetch::EnableParams::builder()
162        .handle_auth_requests(true)
163        .pattern(RequestPattern::builder().url_pattern("*").build())
164        .build()
165    };
166}
167
168#[derive(Debug)]
169pub struct NetworkManager {
170    queued_events: VecDeque<NetworkEvent>,
171    ignore_httpserrors: bool,
172    requests: HashMap<RequestId, HttpRequest>,
173    // TODO put event in an Arc?
174    requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
175    extra_headers: std::collections::HashMap<String, String>,
176    request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
177    user_cache_disabled: bool,
178    attempted_authentications: HashSet<RequestId>,
179    credentials: Option<Credentials>,
180    // unused atm for remote connections, needs to be used for self launches.
181    user_request_interception_enabled: bool,
182    protocol_request_interception_enabled: bool,
183    offline: bool,
184    request_timeout: Duration,
185    // made_request: bool,
186    /// Ignore visuals (no pings, prefetching, and etc).
187    pub ignore_visuals: bool,
188    /// Block CSS stylesheets.
189    pub block_stylesheets: bool,
190    /// Block javascript that is not critical to rendering.
191    pub block_javascript: bool,
192    /// Block analytics from rendering
193    pub block_analytics: bool,
194    /// Only html from loading.
195    pub only_html: bool,
196    /// The custom intercept handle logic to run on the website.
197    pub intercept_manager: NetworkInterceptManager,
198    /// Track the amount of times the document reloaded.
199    pub document_reload_tracker: u8,
200    /// The initial target domain.
201    pub document_target_domain: String,
202}
203
204impl NetworkManager {
205    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
206        Self {
207            queued_events: Default::default(),
208            ignore_httpserrors,
209            requests: Default::default(),
210            requests_will_be_sent: Default::default(),
211            extra_headers: Default::default(),
212            request_id_to_interception_id: Default::default(),
213            user_cache_disabled: false,
214            attempted_authentications: Default::default(),
215            credentials: None,
216            user_request_interception_enabled: false,
217            protocol_request_interception_enabled: false,
218            offline: false,
219            request_timeout,
220            ignore_visuals: false,
221            block_javascript: false,
222            block_stylesheets: false,
223            block_analytics: true,
224            only_html: false,
225            intercept_manager: NetworkInterceptManager::Unknown,
226            document_reload_tracker: 0,
227            document_target_domain: String::new(),
228        }
229    }
230
231    pub fn init_commands(&self) -> CommandChain {
232        let cmds = if self.ignore_httpserrors {
233            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
234        } else {
235            INIT_CHAIN.clone()
236        };
237
238        CommandChain::new(cmds, self.request_timeout)
239    }
240
241    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
242        let method = cmd.identifier();
243        if let Ok(params) = serde_json::to_value(cmd) {
244            self.queued_events
245                .push_back(NetworkEvent::SendCdpRequest((method, params)));
246        }
247    }
248
249    /// The next event to handle
250    pub fn poll(&mut self) -> Option<NetworkEvent> {
251        self.queued_events.pop_front()
252    }
253
254    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
255        &self.extra_headers
256    }
257
258    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
259        self.extra_headers = headers;
260        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
261        if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
262            self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
263        }
264    }
265
266    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
267        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
268    }
269
270    pub fn set_request_interception(&mut self, enabled: bool) {
271        self.user_request_interception_enabled = enabled;
272        self.update_protocol_request_interception();
273    }
274
275    pub fn set_cache_enabled(&mut self, enabled: bool) {
276        self.user_cache_disabled = !enabled;
277        self.update_protocol_cache_disabled();
278    }
279
280    pub fn update_protocol_cache_disabled(&mut self) {
281        self.push_cdp_request(SetCacheDisabledParams::new(
282            self.user_cache_disabled || self.protocol_request_interception_enabled,
283        ));
284    }
285
286    pub fn authenticate(&mut self, credentials: Credentials) {
287        self.credentials = Some(credentials);
288        self.update_protocol_request_interception()
289    }
290
291    fn update_protocol_request_interception(&mut self) {
292        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
293
294        if enabled == self.protocol_request_interception_enabled {
295            return;
296        }
297
298        self.update_protocol_cache_disabled();
299
300        if enabled {
301            self.push_cdp_request(ENABLE_FETCH.clone())
302        } else {
303            self.push_cdp_request(DisableParams::default())
304        }
305    }
306
307    /// Url matches analytics that we want to ignore or trackers.
308    pub(crate) fn ignore_script(
309        &self,
310        url: &str,
311        block_analytics: bool,
312        intercept_manager: NetworkInterceptManager,
313    ) -> bool {
314        let mut ignore_script = block_analytics && URL_IGNORE_TRIE.contains_prefix(url);
315
316        if !ignore_script {
317            if let Some(index) = url.find("//") {
318                let pos = index + 2;
319
320                // Ensure there is something after `//`
321                if pos < url.len() {
322                    // Find the first slash after the `//`
323                    if let Some(slash_index) = url[pos..].find('/') {
324                        let base_path_index = pos + slash_index + 1;
325
326                        if url.len() > base_path_index {
327                            let new_url: &str = &url[base_path_index..];
328
329                            // ignore assets we do not need for frameworks
330                            if !ignore_script
331                                && intercept_manager == NetworkInterceptManager::Unknown
332                            {
333                                let hydration_file =
334                                    JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
335
336                                // ignore astro paths
337                                if hydration_file && new_url.ends_with(".js") {
338                                    ignore_script = true;
339                                }
340                            }
341
342                            if !ignore_script
343                                && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
344                            {
345                                ignore_script = true;
346                            }
347
348                            if !ignore_script
349                                && self.ignore_visuals
350                                && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
351                            {
352                                ignore_script = true;
353                            }
354                        }
355                    }
356                }
357            }
358        }
359
360        // fallback for file ending in analytics.js
361        if !ignore_script && block_analytics {
362            ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
363        }
364
365        ignore_script
366    }
367
368    /// Determine if the request should be skipped.
369    fn skip_xhr(
370        &self,
371        skip_networking: bool,
372        event: &EventRequestPaused,
373        network_event: bool,
374    ) -> bool {
375        // XHR check
376        if !skip_networking && network_event {
377            let request_url = event.request.url.as_str();
378
379            // check if part of ignore scripts.
380            let skip_analytics =
381                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
382
383            if skip_analytics {
384                true
385            } else if self.block_stylesheets || self.ignore_visuals {
386                let block_css = self.block_stylesheets;
387                let block_media = self.ignore_visuals;
388
389                let mut block_request = false;
390
391                if let Some(position) = request_url.rfind('.') {
392                    let hlen = request_url.len();
393                    let has_asset = hlen - position;
394
395                    if has_asset >= 3 {
396                        let next_position = position + 1;
397
398                        if block_media
399                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
400                                &request_url[next_position..].into(),
401                            )
402                        {
403                            block_request = true;
404                        } else if block_css {
405                            block_request =
406                                CaseInsensitiveString::from(request_url[next_position..].as_bytes())
407                                    .contains(&**CSS_EXTENSION)
408                        }
409                    }
410                }
411
412                if !block_request {
413                    block_request = ignore_script_xhr_media(request_url);
414                }
415
416                block_request
417            } else {
418                skip_networking
419            }
420        } else {
421            skip_networking
422        }
423    }
424
425    #[cfg(not(feature = "adblock"))]
426    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
427        use super::blockers::block_websites::block_website;
428
429        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
430            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
431        } else {
432            if let Some(network_id) = event.network_id.as_ref() {
433                if let Some(request_will_be_sent) =
434                    self.requests_will_be_sent.remove(network_id.as_ref())
435                {
436                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
437                } else {
438                    let current_url = event.request.url.as_str();
439                    let javascript_resource = event.resource_type == ResourceType::Script;
440                    let document_resource = event.resource_type == ResourceType::Document;
441                    let network_resource = !document_resource
442                        && (event.resource_type == ResourceType::Xhr
443                            || event.resource_type == ResourceType::Fetch
444                            || event.resource_type == ResourceType::WebSocket);
445
446                    let skip_networking =
447                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
448
449                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
450                    let mut replacer = None;
451
452                    if document_resource {
453                        if self.document_target_domain == current_url {
454                            // this will prevent the domain from looping (3 times is enough).
455                            self.document_reload_tracker += 1;
456                        } else if !self.document_target_domain.is_empty()
457                            && event.redirected_request_id.is_some()
458                        {
459                            let (http_document_replacement, mut https_document_replacement) =
460                                if self.document_target_domain.starts_with("http://") {
461                                    (
462                                        self.document_target_domain.replace("http://", "http//"),
463                                        self.document_target_domain.replace("http://", "https://"),
464                                    )
465                                } else {
466                                    (
467                                        self.document_target_domain.replace("https://", "https//"),
468                                        self.document_target_domain.replace("https://", "http://"),
469                                    )
470                                };
471
472                            let trailing = https_document_replacement.ends_with('/');
473
474                            if trailing {
475                                https_document_replacement.pop();
476                            }
477
478                            if https_document_replacement.ends_with('/') {
479                                https_document_replacement.pop();
480                            }
481
482                            let redirect_mask = format!(
483                                "{}{}",
484                                https_document_replacement, http_document_replacement
485                            );
486
487                            // handle redirect masking
488                            if current_url == redirect_mask {
489                                replacer = Some(if trailing {
490                                    format!("{}/", https_document_replacement)
491                                } else {
492                                    https_document_replacement
493                                });
494                            }
495                        }
496                        self.document_target_domain = event.request.url.clone();
497                    }
498
499                    let current_url = match &replacer {
500                        Some(r) => r,
501                        _ => &event.request.url,
502                    }
503                    .as_str();
504
505                    // main initial check
506                    let skip_networking = if !skip_networking {
507                        self.ignore_visuals
508                            && (IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()))
509                            || self.block_stylesheets
510                                && ResourceType::Stylesheet == event.resource_type
511                            || self.block_javascript
512                                && javascript_resource
513                                && self.intercept_manager == NetworkInterceptManager::Unknown
514                                && !ALLOWED_MATCHER.is_match(current_url)
515                    } else {
516                        skip_networking
517                    };
518
519                    let skip_networking = if !skip_networking
520                        && (self.only_html || self.ignore_visuals)
521                        && (javascript_resource || document_resource)
522                    {
523                        ignore_script_embedded(current_url)
524                    } else {
525                        skip_networking
526                    };
527
528                    // analytics check
529                    let skip_networking = if !skip_networking && javascript_resource {
530                        self.ignore_script(
531                            current_url,
532                            self.block_analytics,
533                            self.intercept_manager,
534                        )
535                    } else {
536                        skip_networking
537                    };
538
539                    // XHR check
540                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
541
542                    // custom interception layer.
543                    let skip_networking = if !skip_networking
544                        && (javascript_resource || network_resource || document_resource)
545                    {
546                        self.intercept_manager.intercept_detection(
547                            &current_url,
548                            self.ignore_visuals,
549                            network_resource,
550                        )
551                    } else {
552                        skip_networking
553                    };
554
555                    let skip_networking =
556                        if !skip_networking && (javascript_resource || network_resource) {
557                            block_website(&current_url)
558                        } else {
559                            skip_networking
560                        };
561
562                    if skip_networking {
563                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
564                        let fullfill_params =
565                            crate::handler::network::fetch::FulfillRequestParams::new(
566                                event.request_id.clone(),
567                                200,
568                            );
569                        self.push_cdp_request(fullfill_params);
570                    } else {
571                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
572                        let mut continue_params =
573                            ContinueRequestParams::new(event.request_id.clone());
574
575                        if replacer.is_some() {
576                            continue_params.url = Some(current_url.into());
577                            continue_params.intercept_response = Some(false);
578                        }
579
580                        self.push_cdp_request(continue_params)
581                    }
582                }
583            } else {
584                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
585            }
586        }
587    }
588
589    #[cfg(feature = "adblock")]
590    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
591        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
592            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
593        } else {
594            if let Some(network_id) = event.network_id.as_ref() {
595                if let Some(request_will_be_sent) =
596                    self.requests_will_be_sent.remove(network_id.as_ref())
597                {
598                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
599                } else {
600                    let current_url = event.request.url.as_str();
601                    let javascript_resource = event.resource_type == ResourceType::Script;
602                    let document_resource = event.resource_type == ResourceType::Document;
603                    let network_resource = !document_resource
604                        && (event.resource_type == ResourceType::Xhr
605                            || event.resource_type == ResourceType::Fetch
606                            || event.resource_type == ResourceType::WebSocket);
607
608                    // block all of these events.
609                    let skip_networking =
610                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
611
612                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
613
614                    if document_resource {
615                        if self.document_target_domain == current_url {
616                            // this will prevent the domain from looping (3 times is enough).
617                            self.document_reload_tracker += 1;
618                        } else if !self.document_target_domain.is_empty()
619                            && event.redirected_request_id.is_some()
620                        {
621                            let (http_document_replacement, mut https_document_replacement) =
622                                if self.document_target_domain.starts_with("http://") {
623                                    (
624                                        self.document_target_domain.replace("http://", "http//"),
625                                        self.document_target_domain.replace("http://", "https://"),
626                                    )
627                                } else {
628                                    (
629                                        self.document_target_domain.replace("https://", "https//"),
630                                        self.document_target_domain.replace("https://", "http://"),
631                                    )
632                                };
633
634                            let trailing = https_document_replacement.ends_with('/');
635
636                            if trailing {
637                                https_document_replacement.pop();
638                            }
639
640                            if https_document_replacement.ends_with('/') {
641                                https_document_replacement.pop();
642                            }
643
644                            let redirect_mask = format!(
645                                "{}{}",
646                                https_document_replacement, http_document_replacement
647                            );
648
649                            // handle redirect masking
650                            if current_url == redirect_mask {
651                                replacer = Some(if trailing {
652                                    format!("{}/", https_document_replacement)
653                                } else {
654                                    https_document_replacement
655                                });
656                            }
657                        }
658                        self.document_target_domain = event.request.url.clone();
659                    }
660
661                    let current_url = match &replacer {
662                        Some(r) => r,
663                        _ => &event.request.url,
664                    }
665                    .as_str();
666
667                    // main initial check
668                    let skip_networking = if !skip_networking {
669                        self.ignore_visuals
670                            && (IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()))
671                            || self.block_stylesheets
672                                && ResourceType::Stylesheet == event.resource_type
673                            || self.block_javascript
674                                && javascript_resource
675                                && self.intercept_manager == NetworkInterceptManager::Unknown
676                                && !ALLOWED_MATCHER.is_match(current_url)
677                    } else {
678                        skip_networking
679                    };
680
681                    let skip_networking = if !skip_networking {
682                        self.detect_ad(event)
683                    } else {
684                        skip_networking
685                    };
686
687                    let skip_networking = if !skip_networking
688                        && (self.only_html || self.ignore_visuals)
689                        && (javascript_resource || document_resource)
690                    {
691                        ignore_script_embedded(current_url)
692                    } else {
693                        skip_networking
694                    };
695
696                    // analytics check
697                    let skip_networking = if !skip_networking && javascript_resource {
698                        self.ignore_script(
699                            current_url,
700                            self.block_analytics,
701                            self.intercept_manager,
702                        )
703                    } else {
704                        skip_networking
705                    };
706
707                    // XHR check
708                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
709
710                    // custom interception layer.
711                    let skip_networking = if !skip_networking
712                        && (javascript_resource || network_resource || document_resource)
713                    {
714                        self.intercept_manager.intercept_detection(
715                            &event.request.url,
716                            self.ignore_visuals,
717                            network_resource,
718                        )
719                    } else {
720                        skip_networking
721                    };
722
723                    let skip_networking =
724                        if !skip_networking && (javascript_resource || network_resource) {
725                            block_website(&event.request.url)
726                        } else {
727                            skip_networking
728                        };
729
730                    if skip_networking {
731                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
732
733                        let fullfill_params =
734                            crate::handler::network::fetch::FulfillRequestParams::new(
735                                event.request_id.clone(),
736                                200,
737                            );
738                        self.push_cdp_request(fullfill_params);
739                    } else {
740                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
741
742                        let mut continue_params =
743                            ContinueRequestParams::new(event.request_id.clone());
744
745                        if replacer.is_some() {
746                            continue_params.url = Some(current_url.into());
747                            continue_params.intercept_response = Some(false);
748                        }
749                    }
750                }
751            } else {
752                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
753            }
754        }
755
756        // if self.only_html {
757        //     self.made_request = true;
758        // }
759    }
760
761    /// Perform a page intercept for chrome
762    #[cfg(feature = "adblock")]
763    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
764        use adblock::{
765            lists::{FilterSet, ParseOptions, RuleTypes},
766            Engine,
767        };
768
769        lazy_static::lazy_static! {
770            static ref AD_ENGINE: Engine = {
771                let mut filter_set = FilterSet::new(false);
772                let mut rules = ParseOptions::default();
773                rules.rule_types = RuleTypes::All;
774
775                filter_set.add_filters(
776                    &*crate::handler::blockers::adblock_patterns::ADBLOCK_PATTERNS,
777                    rules,
778                );
779
780                Engine::from_filter_set(filter_set, true)
781            };
782        };
783
784        let blockable = ResourceType::Image == event.resource_type
785            || event.resource_type == ResourceType::Media
786            || event.resource_type == ResourceType::Stylesheet
787            || event.resource_type == ResourceType::Document
788            || event.resource_type == ResourceType::Fetch
789            || event.resource_type == ResourceType::Xhr;
790
791        let u = &event.request.url;
792
793        let block_request = blockable
794            // set it to example.com for 3rd party handling is_same_site
795        && {
796            let request = adblock::request::Request::preparsed(
797                 &u,
798                 "example.com",
799                 "example.com",
800                 &event.resource_type.as_ref().to_lowercase(),
801                 !event.request.is_same_site.unwrap_or_default());
802
803            AD_ENGINE.check_network_request(&request).matched
804        };
805
806        block_request
807    }
808
809    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
810        let response = if self
811            .attempted_authentications
812            .contains(event.request_id.as_ref())
813        {
814            AuthChallengeResponseResponse::CancelAuth
815        } else if self.credentials.is_some() {
816            self.attempted_authentications
817                .insert(event.request_id.clone().into());
818            AuthChallengeResponseResponse::ProvideCredentials
819        } else {
820            AuthChallengeResponseResponse::Default
821        };
822
823        let mut auth = AuthChallengeResponse::new(response);
824        if let Some(creds) = self.credentials.clone() {
825            auth.username = Some(creds.username);
826            auth.password = Some(creds.password);
827        }
828        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
829    }
830
831    pub fn set_offline_mode(&mut self, value: bool) {
832        if self.offline == value {
833            return;
834        }
835        self.offline = value;
836        if let Ok(network) = EmulateNetworkConditionsParams::builder()
837            .offline(self.offline)
838            .latency(0)
839            .download_throughput(-1.)
840            .upload_throughput(-1.)
841            .build()
842        {
843            self.push_cdp_request(network);
844        }
845    }
846
847    /// Request interception doesn't happen for data URLs with Network Service.
848    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
849        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
850            if let Some(interception_id) = self
851                .request_id_to_interception_id
852                .remove(event.request_id.as_ref())
853            {
854                self.on_request(event, Some(interception_id));
855            } else {
856                // TODO remove the clone for event
857                self.requests_will_be_sent
858                    .insert(event.request_id.clone(), event.clone());
859            }
860        } else {
861            self.on_request(event, None);
862        }
863    }
864
865    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
866        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
867            request.from_memory_cache = true;
868        }
869    }
870
871    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
872        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
873            request.set_response(event.response.clone());
874            self.queued_events
875                .push_back(NetworkEvent::RequestFinished(request))
876        }
877    }
878
879    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
880        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
881            if let Some(interception_id) = request.interception_id.as_ref() {
882                self.attempted_authentications
883                    .remove(interception_id.as_ref());
884            }
885            self.queued_events
886                .push_back(NetworkEvent::RequestFinished(request));
887        }
888    }
889
890    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
891        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
892            request.failure_text = Some(event.error_text.clone());
893            if let Some(interception_id) = request.interception_id.as_ref() {
894                self.attempted_authentications
895                    .remove(interception_id.as_ref());
896            }
897            self.queued_events
898                .push_back(NetworkEvent::RequestFailed(request));
899        }
900    }
901
902    fn on_request(
903        &mut self,
904        event: &EventRequestWillBeSent,
905        interception_id: Option<InterceptionId>,
906    ) {
907        let mut redirect_chain = Vec::new();
908        let mut redirect_location = None;
909
910        if let Some(redirect_resp) = &event.redirect_response {
911            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
912                if redirect_resp.status >= 300 && redirect_resp.status <= 399 {
913                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
914                        let fixed_location = location.replace(&redirect_resp.url, "");
915                        request.response.as_mut().map(|resp| {
916                            resp.headers.0["Location"] =
917                                serde_json::Value::String(fixed_location.clone());
918                        });
919                        redirect_location = Some(fixed_location);
920                    }
921                }
922
923                self.handle_request_redirect(
924                    &mut request,
925                    if let Some(redirect_location) = redirect_location {
926                        let mut redirect_resp = redirect_resp.clone();
927                        redirect_resp.headers.0["Location"] =
928                            serde_json::Value::String(redirect_location);
929                        redirect_resp
930                    } else {
931                        redirect_resp.clone()
932                    },
933                );
934
935                redirect_chain = std::mem::take(&mut request.redirect_chain);
936                redirect_chain.push(request);
937            }
938        }
939
940        let request = HttpRequest::new(
941            event.request_id.clone(),
942            event.frame_id.clone(),
943            interception_id,
944            self.user_request_interception_enabled,
945            redirect_chain,
946        );
947
948        self.requests.insert(event.request_id.clone(), request);
949        self.queued_events
950            .push_back(NetworkEvent::Request(event.request_id.clone()));
951    }
952
953    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
954        request.set_response(response);
955        if let Some(interception_id) = request.interception_id.as_ref() {
956            self.attempted_authentications
957                .remove(interception_id.as_ref());
958        }
959    }
960}
961
962#[derive(Debug)]
963pub enum NetworkEvent {
964    SendCdpRequest((MethodId, serde_json::Value)),
965    Request(RequestId),
966    Response(RequestId),
967    RequestFailed(HttpRequest),
968    RequestFinished(HttpRequest),
969}