chromiumoxide/handler/
network.rs

1use super::blockers::{
2    block_websites::block_xhr,
3    ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
4    intercept_manager::NetworkInterceptManager,
5    scripts::{
6        URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE,
7        URL_IGNORE_TRIE_PATHS,
8    },
9    xhr::IGNORE_XHR_ASSETS,
10};
11use crate::auth::Credentials;
12use crate::cmd::CommandChain;
13use crate::handler::http::HttpRequest;
14use aho_corasick::AhoCorasick;
15use case_insensitive_string::CaseInsensitiveString;
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17    EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
18    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19    InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
20    SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23    fetch::{
24        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26        RequestPattern,
27    },
28    network::SetBypassServiceWorkerParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::{
31    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
32};
33use chromiumoxide_types::{Command, Method, MethodId};
34use hashbrown::{HashMap, HashSet};
35use lazy_static::lazy_static;
36use reqwest::header::PROXY_AUTHORIZATION;
37use std::collections::VecDeque;
38use std::time::Duration;
39
40lazy_static! {
41    /// General patterns for popular libraries and resources
42    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
43        "jquery",           // Covers jquery.min.js, jquery.js, etc.
44        "angular",
45        "react",            // Covers all React-related patterns
46        "vue",              // Covers all Vue-related patterns
47        "bootstrap",
48        "d3",
49        "lodash",
50        "ajax",
51        "application",
52        "app",              // Covers general app scripts like app.js
53        "main",
54        "index",
55        "bundle",
56        "vendor",
57        "runtime",
58        "polyfill",
59        "scripts",
60        "es2015.",
61        "es2020.",
62        "webpack",
63        "/wp-content/js/",  // Covers Wordpress content
64        // Verified 3rd parties for request
65        "https://m.stripe.network/",
66        "https://challenges.cloudflare.com/",
67        "https://www.google.com/recaptcha/api.js",
68        "https://google.com/recaptcha/api.js",
69        "https://js.stripe.com/",
70        "https://cdn.prod.website-files.com/", // webflow cdn scripts
71        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
72        "https://code.jquery.com/jquery-"
73    ];
74
75    /// Determine if a script should be rendered in the browser by name.
76    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).unwrap();
77
78    /// path of a js framework
79    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
80        phf::phf_set! {
81            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
82            "_next/static/", "_astro/", "_app/immutable"
83        }
84    };
85
86    /// Ignore the content types.
87    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
88        "application/pdf",
89        "application/zip",
90        "application/x-rar-compressed",
91        "application/x-tar",
92        "image/png",
93        "image/jpeg",
94        "image/gif",
95        "image/bmp",
96        "image/svg+xml",
97        "video/mp4",
98        "video/x-msvideo",
99        "video/x-matroska",
100        "video/webm",
101        "audio/mpeg",
102        "audio/ogg",
103        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
104        "application/vnd.ms-excel",
105        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
106        "application/vnd.ms-powerpoint",
107        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
108        "application/x-7z-compressed",
109        "application/x-rpm",
110        "application/x-shockwave-flash",
111        "application/rtf",
112    };
113
114    /// Ignore the resources for visual content types.
115    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
116        "Image",
117        "Media",
118        "Font"
119    };
120
121    /// Ignore the resources for visual content types.
122    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
123        "CspViolationReport",
124        "Manifest",
125        "Other",
126        "Prefetch",
127        "Ping",
128    };
129
130    /// Case insenstive css matching
131    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
132
133    /// The command chain.
134    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
135        let enable = EnableParams::default();
136
137        if let Ok(c) = serde_json::to_value(&enable) {
138            vec![(enable.identifier(), c)]
139        } else {
140            vec![]
141        }
142    };
143
144    /// The command chain with https ignore.
145    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
146        let enable = EnableParams::default();
147        let mut v = vec![];
148        if let Ok(c) = serde_json::to_value(&enable) {
149            v.push((enable.identifier(), c));
150        }
151        let ignore = SetIgnoreCertificateErrorsParams::new(true);
152        if let Ok(ignored) = serde_json::to_value(&ignore) {
153            v.push((ignore.identifier(), ignored));
154        }
155
156        v
157    };
158
159    /// Enable the fetch intercept command
160    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
161        fetch::EnableParams::builder()
162        .handle_auth_requests(true)
163        .pattern(RequestPattern::builder().url_pattern("*").build())
164        .build()
165    };
166}
167
168/// Determine if a redirect is true.
169pub(crate) fn is_redirect_status(status: i64) -> bool {
170    matches!(status, 301 | 302 | 303 | 307 | 308)
171}
172
173#[derive(Debug)]
174/// The base network manager.
175pub struct NetworkManager {
176    queued_events: VecDeque<NetworkEvent>,
177    ignore_httpserrors: bool,
178    requests: HashMap<RequestId, HttpRequest>,
179    // TODO put event in an Arc?
180    requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
181    extra_headers: std::collections::HashMap<String, String>,
182    request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
183    user_cache_disabled: bool,
184    attempted_authentications: HashSet<RequestId>,
185    credentials: Option<Credentials>,
186    // unused atm for remote connections, needs to be used for self launches.
187    user_request_interception_enabled: bool,
188    protocol_request_interception_enabled: bool,
189    offline: bool,
190    request_timeout: Duration,
191    // made_request: bool,
192    /// Ignore visuals (no pings, prefetching, and etc).
193    pub ignore_visuals: bool,
194    /// Block CSS stylesheets.
195    pub block_stylesheets: bool,
196    /// Block javascript that is not critical to rendering.
197    pub block_javascript: bool,
198    /// Block analytics from rendering
199    pub block_analytics: bool,
200    /// Only html from loading.
201    pub only_html: bool,
202    /// The custom intercept handle logic to run on the website.
203    pub intercept_manager: NetworkInterceptManager,
204    /// Track the amount of times the document reloaded.
205    pub document_reload_tracker: u8,
206    /// The initial target domain.
207    pub document_target_domain: String,
208}
209
210impl NetworkManager {
211    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
212        Self {
213            queued_events: Default::default(),
214            ignore_httpserrors,
215            requests: Default::default(),
216            requests_will_be_sent: Default::default(),
217            extra_headers: Default::default(),
218            request_id_to_interception_id: Default::default(),
219            user_cache_disabled: false,
220            attempted_authentications: Default::default(),
221            credentials: None,
222            user_request_interception_enabled: false,
223            protocol_request_interception_enabled: false,
224            offline: false,
225            request_timeout,
226            ignore_visuals: false,
227            block_javascript: false,
228            block_stylesheets: false,
229            block_analytics: true,
230            only_html: false,
231            intercept_manager: NetworkInterceptManager::Unknown,
232            document_reload_tracker: 0,
233            document_target_domain: String::new(),
234        }
235    }
236
237    pub fn init_commands(&self) -> CommandChain {
238        let cmds = if self.ignore_httpserrors {
239            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
240        } else {
241            INIT_CHAIN.clone()
242        };
243
244        CommandChain::new(cmds, self.request_timeout)
245    }
246
247    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
248        let method = cmd.identifier();
249        if let Ok(params) = serde_json::to_value(cmd) {
250            self.queued_events
251                .push_back(NetworkEvent::SendCdpRequest((method, params)));
252        }
253    }
254
255    /// The next event to handle
256    pub fn poll(&mut self) -> Option<NetworkEvent> {
257        self.queued_events.pop_front()
258    }
259
260    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
261        &self.extra_headers
262    }
263
264    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
265        self.extra_headers = headers;
266        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
267        if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
268            self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
269        }
270    }
271
272    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
273        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
274    }
275
276    pub fn set_request_interception(&mut self, enabled: bool) {
277        self.user_request_interception_enabled = enabled;
278        self.update_protocol_request_interception();
279    }
280
281    pub fn set_cache_enabled(&mut self, enabled: bool) {
282        let run = self.user_cache_disabled != !enabled;
283        self.user_cache_disabled = !enabled;
284        if run {
285            self.update_protocol_cache_disabled();
286        }
287    }
288
289    pub fn disable_request_intercept(&mut self) {
290        self.protocol_request_interception_enabled = true;
291    }
292
293    pub fn update_protocol_cache_disabled(&mut self) {
294        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
295    }
296
297    pub fn authenticate(&mut self, credentials: Credentials) {
298        self.credentials = Some(credentials);
299        self.update_protocol_request_interception()
300    }
301
302    fn update_protocol_request_interception(&mut self) {
303        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
304
305        if enabled == self.protocol_request_interception_enabled {
306            return;
307        }
308
309        if enabled {
310            self.push_cdp_request(ENABLE_FETCH.clone())
311        } else {
312            self.push_cdp_request(DisableParams::default())
313        }
314    }
315
316    /// Url matches analytics that we want to ignore or trackers.
317    pub(crate) fn ignore_script(
318        &self,
319        url: &str,
320        block_analytics: bool,
321        intercept_manager: NetworkInterceptManager,
322    ) -> bool {
323        let mut ignore_script = block_analytics && URL_IGNORE_TRIE.contains_prefix(url);
324
325        if !ignore_script {
326            if let Some(index) = url.find("//") {
327                let pos = index + 2;
328
329                // Ensure there is something after `//`
330                if pos < url.len() {
331                    // Find the first slash after the `//`
332                    if let Some(slash_index) = url[pos..].find('/') {
333                        let base_path_index = pos + slash_index + 1;
334
335                        if url.len() > base_path_index {
336                            let new_url: &str = &url[base_path_index..];
337
338                            // ignore assets we do not need for frameworks
339                            if !ignore_script
340                                && intercept_manager == NetworkInterceptManager::Unknown
341                            {
342                                let hydration_file =
343                                    JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
344
345                                // ignore astro paths
346                                if hydration_file && new_url.ends_with(".js") {
347                                    ignore_script = true;
348                                }
349                            }
350
351                            if !ignore_script
352                                && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
353                            {
354                                ignore_script = true;
355                            }
356
357                            if !ignore_script
358                                && self.ignore_visuals
359                                && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
360                            {
361                                ignore_script = true;
362                            }
363                        }
364                    }
365                }
366            }
367        }
368
369        // fallback for file ending in analytics.js
370        if !ignore_script && block_analytics {
371            ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
372        }
373
374        ignore_script
375    }
376
377    /// Determine if the request should be skipped.
378    fn skip_xhr(
379        &self,
380        skip_networking: bool,
381        event: &EventRequestPaused,
382        network_event: bool,
383    ) -> bool {
384        // XHR check
385        if !skip_networking && network_event {
386            let request_url = event.request.url.as_str();
387
388            // check if part of ignore scripts.
389            let skip_analytics =
390                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
391
392            if skip_analytics {
393                true
394            } else if self.block_stylesheets || self.ignore_visuals {
395                let block_css = self.block_stylesheets;
396                let block_media = self.ignore_visuals;
397
398                let mut block_request = false;
399
400                if let Some(position) = request_url.rfind('.') {
401                    let hlen = request_url.len();
402                    let has_asset = hlen - position;
403
404                    if has_asset >= 3 {
405                        let next_position = position + 1;
406
407                        if block_media
408                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
409                                &request_url[next_position..].into(),
410                            )
411                        {
412                            block_request = true;
413                        } else if block_css {
414                            block_request =
415                                CaseInsensitiveString::from(request_url[next_position..].as_bytes())
416                                    .contains(&**CSS_EXTENSION)
417                        }
418                    }
419                }
420
421                if !block_request {
422                    block_request = ignore_script_xhr_media(request_url);
423                }
424
425                block_request
426            } else {
427                skip_networking
428            }
429        } else {
430            skip_networking
431        }
432    }
433
434    #[cfg(not(feature = "adblock"))]
435    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
436        use super::blockers::block_websites::block_website;
437
438        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
439            return;
440        }
441
442        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
443            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
444        } else {
445            if let Some(network_id) = event.network_id.as_ref() {
446                if let Some(request_will_be_sent) =
447                    self.requests_will_be_sent.remove(network_id.as_ref())
448                {
449                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
450                } else {
451                    let current_url = event.request.url.as_str();
452                    let javascript_resource = event.resource_type == ResourceType::Script;
453                    let document_resource = event.resource_type == ResourceType::Document;
454                    let network_resource = !document_resource
455                        && (event.resource_type == ResourceType::Xhr
456                            || event.resource_type == ResourceType::Fetch
457                            || event.resource_type == ResourceType::WebSocket);
458
459                    let skip_networking =
460                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
461
462                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
463                    let mut replacer = None;
464
465                    if document_resource {
466                        if self.document_target_domain == current_url {
467                            // this will prevent the domain from looping (3 times is enough).
468                            self.document_reload_tracker += 1;
469                        } else if !self.document_target_domain.is_empty()
470                            && event.redirected_request_id.is_some()
471                        {
472                            let (http_document_replacement, mut https_document_replacement) =
473                                if self.document_target_domain.starts_with("http://") {
474                                    (
475                                        self.document_target_domain.replace("http://", "http//"),
476                                        self.document_target_domain.replace("http://", "https://"),
477                                    )
478                                } else {
479                                    (
480                                        self.document_target_domain.replace("https://", "https//"),
481                                        self.document_target_domain.replace("https://", "http://"),
482                                    )
483                                };
484
485                            let trailing = https_document_replacement.ends_with('/');
486
487                            if trailing {
488                                https_document_replacement.pop();
489                            }
490
491                            if https_document_replacement.ends_with('/') {
492                                https_document_replacement.pop();
493                            }
494
495                            let redirect_mask = format!(
496                                "{}{}",
497                                https_document_replacement, http_document_replacement
498                            );
499
500                            // handle redirect masking
501                            if current_url == redirect_mask {
502                                replacer = Some(if trailing {
503                                    format!("{}/", https_document_replacement)
504                                } else {
505                                    https_document_replacement
506                                });
507                            }
508                        }
509                        self.document_target_domain = event.request.url.clone();
510                    }
511
512                    let current_url = match &replacer {
513                        Some(r) => r,
514                        _ => &event.request.url,
515                    }
516                    .as_str();
517
518                    // main initial check
519                    let skip_networking = if !skip_networking {
520                        self.ignore_visuals
521                            && (IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()))
522                            || self.block_stylesheets
523                                && ResourceType::Stylesheet == event.resource_type
524                            || self.block_javascript
525                                && javascript_resource
526                                && self.intercept_manager == NetworkInterceptManager::Unknown
527                                && !ALLOWED_MATCHER.is_match(current_url)
528                    } else {
529                        skip_networking
530                    };
531
532                    let skip_networking = if !skip_networking
533                        && (self.only_html || self.ignore_visuals)
534                        && (javascript_resource || document_resource)
535                    {
536                        ignore_script_embedded(current_url)
537                    } else {
538                        skip_networking
539                    };
540
541                    // analytics check
542                    let skip_networking = if !skip_networking && javascript_resource {
543                        self.ignore_script(
544                            current_url,
545                            self.block_analytics,
546                            self.intercept_manager,
547                        )
548                    } else {
549                        skip_networking
550                    };
551
552                    // XHR check
553                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
554
555                    // custom interception layer.
556                    let skip_networking = if !skip_networking
557                        && (javascript_resource || network_resource || document_resource)
558                    {
559                        self.intercept_manager.intercept_detection(
560                            &current_url,
561                            self.ignore_visuals,
562                            network_resource,
563                        )
564                    } else {
565                        skip_networking
566                    };
567
568                    let skip_networking =
569                        if !skip_networking && (javascript_resource || network_resource) {
570                            block_website(&current_url)
571                        } else {
572                            skip_networking
573                        };
574
575                    if skip_networking {
576                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
577                        let fullfill_params =
578                            crate::handler::network::fetch::FulfillRequestParams::new(
579                                event.request_id.clone(),
580                                200,
581                            );
582                        self.push_cdp_request(fullfill_params);
583                    } else {
584                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
585                        let mut continue_params =
586                            ContinueRequestParams::new(event.request_id.clone());
587
588                        if replacer.is_some() {
589                            continue_params.url = Some(current_url.into());
590                            continue_params.intercept_response = Some(false);
591                        }
592
593                        self.push_cdp_request(continue_params)
594                    }
595                }
596            } else {
597                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
598            }
599        }
600    }
601
602    #[cfg(feature = "adblock")]
603    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
604        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
605            return;
606        }
607
608        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
609            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
610        } else {
611            if let Some(network_id) = event.network_id.as_ref() {
612                if let Some(request_will_be_sent) =
613                    self.requests_will_be_sent.remove(network_id.as_ref())
614                {
615                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
616                } else {
617                    let current_url = event.request.url.as_str();
618                    let javascript_resource = event.resource_type == ResourceType::Script;
619                    let document_resource = event.resource_type == ResourceType::Document;
620                    let network_resource = !document_resource
621                        && (event.resource_type == ResourceType::Xhr
622                            || event.resource_type == ResourceType::Fetch
623                            || event.resource_type == ResourceType::WebSocket);
624
625                    // block all of these events.
626                    let skip_networking =
627                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
628
629                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
630
631                    if document_resource {
632                        if self.document_target_domain == current_url {
633                            // this will prevent the domain from looping (3 times is enough).
634                            self.document_reload_tracker += 1;
635                        } else if !self.document_target_domain.is_empty()
636                            && event.redirected_request_id.is_some()
637                        {
638                            let (http_document_replacement, mut https_document_replacement) =
639                                if self.document_target_domain.starts_with("http://") {
640                                    (
641                                        self.document_target_domain.replace("http://", "http//"),
642                                        self.document_target_domain.replace("http://", "https://"),
643                                    )
644                                } else {
645                                    (
646                                        self.document_target_domain.replace("https://", "https//"),
647                                        self.document_target_domain.replace("https://", "http://"),
648                                    )
649                                };
650
651                            let trailing = https_document_replacement.ends_with('/');
652
653                            if trailing {
654                                https_document_replacement.pop();
655                            }
656
657                            if https_document_replacement.ends_with('/') {
658                                https_document_replacement.pop();
659                            }
660
661                            let redirect_mask = format!(
662                                "{}{}",
663                                https_document_replacement, http_document_replacement
664                            );
665
666                            // handle redirect masking
667                            if current_url == redirect_mask {
668                                replacer = Some(if trailing {
669                                    format!("{}/", https_document_replacement)
670                                } else {
671                                    https_document_replacement
672                                });
673                            }
674                        }
675                        self.document_target_domain = event.request.url.clone();
676                    }
677
678                    let current_url = match &replacer {
679                        Some(r) => r,
680                        _ => &event.request.url,
681                    }
682                    .as_str();
683
684                    // main initial check
685                    let skip_networking = if !skip_networking {
686                        self.ignore_visuals
687                            && (IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()))
688                            || self.block_stylesheets
689                                && ResourceType::Stylesheet == event.resource_type
690                            || self.block_javascript
691                                && javascript_resource
692                                && self.intercept_manager == NetworkInterceptManager::Unknown
693                                && !ALLOWED_MATCHER.is_match(current_url)
694                    } else {
695                        skip_networking
696                    };
697
698                    let skip_networking = if !skip_networking {
699                        self.detect_ad(event)
700                    } else {
701                        skip_networking
702                    };
703
704                    let skip_networking = if !skip_networking
705                        && (self.only_html || self.ignore_visuals)
706                        && (javascript_resource || document_resource)
707                    {
708                        ignore_script_embedded(current_url)
709                    } else {
710                        skip_networking
711                    };
712
713                    // analytics check
714                    let skip_networking = if !skip_networking && javascript_resource {
715                        self.ignore_script(
716                            current_url,
717                            self.block_analytics,
718                            self.intercept_manager,
719                        )
720                    } else {
721                        skip_networking
722                    };
723
724                    // XHR check
725                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
726
727                    // custom interception layer.
728                    let skip_networking = if !skip_networking
729                        && (javascript_resource || network_resource || document_resource)
730                    {
731                        self.intercept_manager.intercept_detection(
732                            &event.request.url,
733                            self.ignore_visuals,
734                            network_resource,
735                        )
736                    } else {
737                        skip_networking
738                    };
739
740                    let skip_networking =
741                        if !skip_networking && (javascript_resource || network_resource) {
742                            block_website(&event.request.url)
743                        } else {
744                            skip_networking
745                        };
746
747                    if skip_networking {
748                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
749
750                        let fullfill_params =
751                            crate::handler::network::fetch::FulfillRequestParams::new(
752                                event.request_id.clone(),
753                                200,
754                            );
755                        self.push_cdp_request(fullfill_params);
756                    } else {
757                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
758
759                        let mut continue_params =
760                            ContinueRequestParams::new(event.request_id.clone());
761
762                        if replacer.is_some() {
763                            continue_params.url = Some(current_url.into());
764                            continue_params.intercept_response = Some(false);
765                        }
766                    }
767                }
768            } else {
769                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
770            }
771        }
772
773        // if self.only_html {
774        //     self.made_request = true;
775        // }
776    }
777
778    /// Perform a page intercept for chrome
779    #[cfg(feature = "adblock")]
780    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
781        use adblock::{
782            lists::{FilterSet, ParseOptions, RuleTypes},
783            Engine,
784        };
785
786        lazy_static::lazy_static! {
787            static ref AD_ENGINE: Engine = {
788                let mut filter_set = FilterSet::new(false);
789                let mut rules = ParseOptions::default();
790                rules.rule_types = RuleTypes::All;
791
792                filter_set.add_filters(
793                    &*crate::handler::blockers::adblock_patterns::ADBLOCK_PATTERNS,
794                    rules,
795                );
796
797                Engine::from_filter_set(filter_set, true)
798            };
799        };
800
801        let blockable = ResourceType::Image == event.resource_type
802            || event.resource_type == ResourceType::Media
803            || event.resource_type == ResourceType::Stylesheet
804            || event.resource_type == ResourceType::Document
805            || event.resource_type == ResourceType::Fetch
806            || event.resource_type == ResourceType::Xhr;
807
808        let u = &event.request.url;
809
810        let block_request = blockable
811            // set it to example.com for 3rd party handling is_same_site
812        && {
813            let request = adblock::request::Request::preparsed(
814                 &u,
815                 "example.com",
816                 "example.com",
817                 &event.resource_type.as_ref().to_lowercase(),
818                 !event.request.is_same_site.unwrap_or_default());
819
820            AD_ENGINE.check_network_request(&request).matched
821        };
822
823        block_request
824    }
825
826    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
827        let response = if self
828            .attempted_authentications
829            .contains(event.request_id.as_ref())
830        {
831            AuthChallengeResponseResponse::CancelAuth
832        } else if self.credentials.is_some() {
833            self.attempted_authentications
834                .insert(event.request_id.clone().into());
835            AuthChallengeResponseResponse::ProvideCredentials
836        } else {
837            AuthChallengeResponseResponse::Default
838        };
839
840        let mut auth = AuthChallengeResponse::new(response);
841        if let Some(creds) = self.credentials.clone() {
842            auth.username = Some(creds.username);
843            auth.password = Some(creds.password);
844        }
845        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
846    }
847
848    pub fn set_offline_mode(&mut self, value: bool) {
849        if self.offline == value {
850            return;
851        }
852        self.offline = value;
853        if let Ok(network) = EmulateNetworkConditionsParams::builder()
854            .offline(self.offline)
855            .latency(0)
856            .download_throughput(-1.)
857            .upload_throughput(-1.)
858            .build()
859        {
860            self.push_cdp_request(network);
861        }
862    }
863
864    /// Request interception doesn't happen for data URLs with Network Service.
865    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
866        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
867            if let Some(interception_id) = self
868                .request_id_to_interception_id
869                .remove(event.request_id.as_ref())
870            {
871                self.on_request(event, Some(interception_id));
872            } else {
873                // TODO remove the clone for event
874                self.requests_will_be_sent
875                    .insert(event.request_id.clone(), event.clone());
876            }
877        } else {
878            self.on_request(event, None);
879        }
880    }
881
882    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
883        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
884            request.from_memory_cache = true;
885        }
886    }
887
888    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
889        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
890            request.set_response(event.response.clone());
891            self.queued_events
892                .push_back(NetworkEvent::RequestFinished(request))
893        }
894    }
895
896    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
897        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
898            if let Some(interception_id) = request.interception_id.as_ref() {
899                self.attempted_authentications
900                    .remove(interception_id.as_ref());
901            }
902            self.queued_events
903                .push_back(NetworkEvent::RequestFinished(request));
904        }
905    }
906
907    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
908        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
909            request.failure_text = Some(event.error_text.clone());
910            if let Some(interception_id) = request.interception_id.as_ref() {
911                self.attempted_authentications
912                    .remove(interception_id.as_ref());
913            }
914            self.queued_events
915                .push_back(NetworkEvent::RequestFailed(request));
916        }
917    }
918
919    fn on_request(
920        &mut self,
921        event: &EventRequestWillBeSent,
922        interception_id: Option<InterceptionId>,
923    ) {
924        let mut redirect_chain = Vec::new();
925        let mut redirect_location = None;
926
927        if let Some(redirect_resp) = &event.redirect_response {
928            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
929                if is_redirect_status(redirect_resp.status) {
930                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
931                        if redirect_resp.url != location {
932                            let fixed_location = location.replace(&redirect_resp.url, "");
933                            request.response.as_mut().map(|resp| {
934                                resp.headers.0["Location"] =
935                                    serde_json::Value::String(fixed_location.clone());
936                            });
937                            redirect_location = Some(fixed_location);
938                        }
939                    }
940                }
941
942                self.handle_request_redirect(
943                    &mut request,
944                    if let Some(redirect_location) = redirect_location {
945                        let mut redirect_resp = redirect_resp.clone();
946                        redirect_resp.headers.0["Location"] =
947                            serde_json::Value::String(redirect_location);
948                        redirect_resp
949                    } else {
950                        redirect_resp.clone()
951                    },
952                );
953
954                redirect_chain = std::mem::take(&mut request.redirect_chain);
955                redirect_chain.push(request);
956            }
957        }
958
959        let request = HttpRequest::new(
960            event.request_id.clone(),
961            event.frame_id.clone(),
962            interception_id,
963            self.user_request_interception_enabled,
964            redirect_chain,
965        );
966
967        self.requests.insert(event.request_id.clone(), request);
968        self.queued_events
969            .push_back(NetworkEvent::Request(event.request_id.clone()));
970    }
971
972    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
973        request.set_response(response);
974        if let Some(interception_id) = request.interception_id.as_ref() {
975            self.attempted_authentications
976                .remove(interception_id.as_ref());
977        }
978    }
979}
980
981#[derive(Debug)]
982pub enum NetworkEvent {
983    SendCdpRequest((MethodId, serde_json::Value)),
984    Request(RequestId),
985    Response(RequestId),
986    RequestFailed(HttpRequest),
987    RequestFinished(HttpRequest),
988}