Skip to main content

chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::sync::Arc;
3use std::time::Instant;
4
5use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
6use std::task::{Context, Poll};
7use tokio::sync::oneshot::Sender;
8
9use crate::auth::Credentials;
10use crate::cdp::browser_protocol::target::CloseTargetParams;
11use crate::cmd::CommandChain;
12use crate::cmd::CommandMessage;
13use crate::error::{CdpError, Result};
14use crate::handler::browser::BrowserContext;
15use crate::handler::domworld::DOMWorldKind;
16use crate::handler::emulation::EmulationManager;
17use crate::handler::frame::FrameRequestedNavigation;
18use crate::handler::frame::{
19    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
20};
21use crate::handler::network::{NetworkEvent, NetworkManager};
22use crate::handler::page::PageHandle;
23use crate::handler::viewport::Viewport;
24use crate::handler::{PageInner, REQUEST_TIMEOUT};
25use crate::listeners::{EventListenerRequest, EventListeners};
26use crate::{page::Page, ArcHttpRequest};
27use chromiumoxide_cdp::cdp::browser_protocol::{
28    browser::BrowserContextId,
29    log as cdplog,
30    page::{FrameId, GetFrameTreeParams},
31    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
32};
33use chromiumoxide_cdp::cdp::events::CdpEvent;
34use chromiumoxide_cdp::cdp::js_protocol::runtime::{
35    ExecutionContextId, RunIfWaitingForDebuggerParams,
36};
37use chromiumoxide_cdp::cdp::CdpEventMessage;
38use chromiumoxide_types::{Command, Method, Request, Response};
39use spider_network_blocker::intercept_manager::NetworkInterceptManager;
40use std::time::Duration;
41
42macro_rules! advance_state {
43    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
44        if let Poll::Ready(poll) = $cmds.poll($now) {
45            return match poll {
46                None => {
47                    $s.init_state = $next_state;
48                    $s.poll($cx, $now)
49                }
50                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
51                    method,
52                    session_id: $s.session_id.clone().map(Into::into),
53                    params,
54                })),
55                Some(Err(_)) => Some($s.on_initialization_failed()),
56            };
57        } else {
58            return None;
59        }
60    }};
61}
62
63lazy_static::lazy_static! {
64    /// Initial start command params.
65    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
66        if let Ok(attach) = SetAutoAttachParams::builder()
67            .flatten(true)
68            .auto_attach(true)
69            .wait_for_debugger_on_start(true)
70            .build() {
71                let disable_log = cdplog::DisableParams::default();
72
73                let mut cmds =  vec![
74                    (
75                        attach.identifier(),
76                        serde_json::to_value(attach).unwrap_or_default(),
77                    ),
78                    (
79                        disable_log.identifier(),
80                        serde_json::to_value(disable_log).unwrap_or_default(),
81                    )
82                ];
83
84                // enable performance on pages.
85                if cfg!(feature = "collect_metrics") {
86                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
87                    cmds.push((
88                        enable_performance.identifier(),
89                        serde_json::to_value(enable_performance).unwrap_or_default(),
90                    ));
91                }
92
93                cmds
94            } else {
95                vec![]
96            }
97    };
98
99    /// Attach to target commands
100    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
101        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
102
103        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
104    };
105}
106
107#[derive(Debug)]
108pub struct Target {
109    /// Info about this target as returned from the chromium instance
110    info: TargetInfo,
111    /// The type of this target
112    r#type: TargetType,
113    /// Configs for this target
114    config: TargetConfig,
115    /// The context this target is running in
116    browser_context: BrowserContext,
117    /// The frame manager that maintains the state of all frames and handles
118    /// navigations of frames
119    frame_manager: FrameManager,
120    /// Handles all the https
121    pub(crate) network_manager: NetworkManager,
122    emulation_manager: EmulationManager,
123    /// The identifier of the session this target is attached to
124    session_id: Option<SessionId>,
125    /// The handle of the browser page of this target
126    page: Option<PageHandle>,
127    /// Drives this target towards initialization
128    pub(crate) init_state: TargetInit,
129    /// Currently queued events to report to the `Handler`
130    queued_events: VecDeque<TargetEvent>,
131    /// All registered event subscriptions
132    event_listeners: EventListeners,
133    /// Senders that need to be notified once the main frame has loaded
134    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
135    /// Senders that need to be notified once the main frame reaches `networkIdle`.
136    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
137    /// (Optional) for `networkAlmostIdle` if you want it as well.
138    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
139    /// The sender who requested the page.
140    initiator: Option<Sender<Result<Page>>>,
141}
142
143impl Target {
144    /// Create a new target instance with `TargetInfo` after a
145    /// `CreateTargetParams` request.
146    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
147        let ty = TargetType::new(&info.r#type);
148        let request_timeout: Duration = config.request_timeout;
149        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
150
151        if !config.cache_enabled {
152            network_manager.set_cache_enabled(false);
153        }
154
155        if !config.service_worker_enabled {
156            network_manager.set_service_worker_enabled(true);
157        }
158
159        network_manager.set_request_interception(config.request_intercept);
160        network_manager.max_bytes_allowed = config.max_bytes_allowed;
161
162        if let Some(headers) = &config.extra_headers {
163            network_manager.set_extra_headers(headers.clone());
164        }
165
166        if let Some(whitelist) = &config.whitelist_patterns {
167            network_manager.set_whitelist_patterns(whitelist.clone());
168        }
169
170        if let Some(blacklist) = &config.blacklist_patterns {
171            network_manager.set_blacklist_patterns(blacklist);
172        }
173
174        network_manager.ignore_visuals = config.ignore_visuals;
175        network_manager.block_javascript = config.ignore_javascript;
176        network_manager.block_analytics = config.ignore_analytics;
177        network_manager.block_prefetch = config.ignore_prefetch;
178
179        network_manager.block_stylesheets = config.ignore_stylesheets;
180        network_manager.only_html = config.only_html;
181        network_manager.intercept_manager = config.intercept_manager;
182
183        #[cfg(feature = "adblock")]
184        if let Some(rules) = &config.adblock_filter_rules {
185            use adblock::lists::{FilterSet, ParseOptions, RuleTypes};
186
187            let mut filter_set = FilterSet::new(false);
188            let mut opts = ParseOptions::default();
189            opts.rule_types = RuleTypes::All;
190
191            // Include built-in patterns.
192            filter_set.add_filters(
193                &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
194                opts.clone(),
195            );
196            // Merge user-supplied rules (e.g. EasyList / EasyPrivacy content).
197            filter_set.add_filters(rules.iter().map(|s| s.as_str()), opts);
198
199            let engine = adblock::Engine::from_filter_set(filter_set, true);
200            network_manager.set_adblock_engine(std::sync::Arc::new(engine));
201        }
202
203        Self {
204            info,
205            r#type: ty,
206            config,
207            frame_manager: FrameManager::new(request_timeout),
208            network_manager,
209            emulation_manager: EmulationManager::new(request_timeout),
210            session_id: None,
211            page: None,
212            init_state: TargetInit::AttachToTarget,
213            wait_for_frame_navigation: Default::default(),
214            wait_for_network_idle: Default::default(),
215            wait_for_network_almost_idle: Default::default(),
216            queued_events: Default::default(),
217            event_listeners: Default::default(),
218            initiator: None,
219            browser_context,
220        }
221    }
222
223    /// Set the session id.
224    pub fn set_session_id(&mut self, id: SessionId) {
225        self.session_id = Some(id)
226    }
227
228    /// Get the session id.
229    pub fn session_id(&self) -> Option<&SessionId> {
230        self.session_id.as_ref()
231    }
232
233    /// Get the session id mut.
234    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
235        &mut self.session_id
236    }
237
238    /// Get the browser context.
239    pub fn browser_context(&self) -> &BrowserContext {
240        &self.browser_context
241    }
242
243    /// The identifier for this target
244    pub fn target_id(&self) -> &TargetId {
245        &self.info.target_id
246    }
247
248    /// The type of this target
249    pub fn r#type(&self) -> &TargetType {
250        &self.r#type
251    }
252
253    /// Whether this target is already initialized
254    pub fn is_initialized(&self) -> bool {
255        matches!(self.init_state, TargetInit::Initialized)
256    }
257
258    /// Navigate a frame
259    pub fn goto(&mut self, req: FrameRequestedNavigation) {
260        if self.network_manager.has_target_domain() {
261            self.network_manager.clear_target_domain();
262            let goto_url = req
263                .req
264                .params
265                .as_object()
266                .and_then(|o| o.get("url"))
267                .and_then(|v| v.as_str());
268
269            if let Some(url) = goto_url {
270                self.network_manager.set_page_url(url.into());
271            }
272        }
273        self.frame_manager.goto(req);
274    }
275
276    /// Create a new page from the session.
277    fn create_page(&mut self) {
278        if self.page.is_none() {
279            if let Some(session) = self.session_id.clone() {
280                let handle = PageHandle::new(
281                    self.target_id().clone(),
282                    session,
283                    self.opener_id().cloned(),
284                    self.config.request_timeout,
285                );
286                self.page = Some(handle);
287            }
288        }
289    }
290
291    /// Tries to create the `PageInner` if this target is already initialized
292    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
293        self.create_page();
294        self.page.as_ref().map(|p| p.inner())
295    }
296
297    /// Is the target a page?
298    pub fn is_page(&self) -> bool {
299        self.r#type().is_page()
300    }
301
302    /// The browser context ID.
303    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
304        self.info.browser_context_id.as_ref()
305    }
306
307    /// The target connection info.
308    pub fn info(&self) -> &TargetInfo {
309        &self.info
310    }
311
312    /// Get the target that opened this target. Top-level targets return `None`.
313    pub fn opener_id(&self) -> Option<&TargetId> {
314        self.info.opener_id.as_ref()
315    }
316
317    pub fn frame_manager(&self) -> &FrameManager {
318        &self.frame_manager
319    }
320
321    /// The frame manager.
322    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
323        &mut self.frame_manager
324    }
325
326    /// Get event listeners mutably.
327    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
328        &mut self.event_listeners
329    }
330
331    /// Received a response to a command issued by this target
332    pub fn on_response(&mut self, resp: Response, method: &str) {
333        if let Some(cmds) = self.init_state.commands_mut() {
334            cmds.received_response(method);
335        }
336
337        if let GetFrameTreeParams::IDENTIFIER = method {
338            if let Some(resp) = resp
339                .result
340                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
341            {
342                self.frame_manager.on_frame_tree(resp.frame_tree);
343            }
344        }
345        // requests originated from the network manager all return an empty response, hence they
346        // can be ignored here
347    }
348
349    /// On CDP Event message.
350    pub fn on_event(&mut self, event: CdpEventMessage) {
351        let CdpEventMessage {
352            params,
353            method,
354            session_id,
355            ..
356        } = event;
357
358        let is_session_scoped = matches!(
359            params,
360            CdpEvent::FetchRequestPaused(_)
361                | CdpEvent::FetchAuthRequired(_)
362                | CdpEvent::NetworkRequestWillBeSent(_)
363                | CdpEvent::NetworkResponseReceived(_)
364                | CdpEvent::NetworkLoadingFinished(_)
365                | CdpEvent::NetworkLoadingFailed(_)
366                | CdpEvent::PageFrameAttached(_)
367                | CdpEvent::PageFrameDetached(_)
368                | CdpEvent::PageFrameNavigated(_)
369                | CdpEvent::PageNavigatedWithinDocument(_)
370                | CdpEvent::PageLifecycleEvent(_)
371                | CdpEvent::PageFrameStartedLoading(_)
372                | CdpEvent::PageFrameStoppedLoading(_)
373                | CdpEvent::RuntimeExecutionContextCreated(_)
374                | CdpEvent::RuntimeExecutionContextDestroyed(_)
375                | CdpEvent::RuntimeExecutionContextsCleared(_)
376                | CdpEvent::RuntimeBindingCalled(_)
377        );
378
379        if is_session_scoped {
380            let ev_sid: &str = match session_id.as_deref() {
381                Some(s) => s,
382                None => return,
383            };
384
385            let self_sid: &str = match self.session_id.as_ref() {
386                Some(sid) => sid.as_ref(),
387                None => return,
388            };
389
390            if self_sid != ev_sid {
391                return;
392            }
393        }
394
395        match &params {
396            // `FrameManager` events
397            CdpEvent::PageFrameAttached(ev) => self
398                .frame_manager
399                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
400            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
401            CdpEvent::PageFrameNavigated(ev) => {
402                self.frame_manager.on_frame_navigated(&ev.frame);
403            }
404            CdpEvent::PageNavigatedWithinDocument(ev) => {
405                self.frame_manager.on_frame_navigated_within_document(ev)
406            }
407            CdpEvent::RuntimeExecutionContextCreated(ev) => {
408                self.frame_manager.on_frame_execution_context_created(ev)
409            }
410            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
411                self.frame_manager.on_frame_execution_context_destroyed(ev)
412            }
413            CdpEvent::RuntimeExecutionContextsCleared(_) => {
414                self.frame_manager.on_execution_contexts_cleared()
415            }
416            CdpEvent::RuntimeBindingCalled(ev) => {
417                // TODO check if binding registered and payload is json
418                self.frame_manager.on_runtime_binding_called(ev)
419            }
420            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
421            CdpEvent::PageFrameStartedLoading(ev) => {
422                self.frame_manager.on_frame_started_loading(ev);
423            }
424            CdpEvent::PageFrameStoppedLoading(ev) => {
425                self.frame_manager.on_frame_stopped_loading(ev);
426            }
427            // `Target` events
428            CdpEvent::TargetAttachedToTarget(ev) => {
429                if ev.waiting_for_debugger {
430                    let runtime_cmd = ATTACH_TARGET.clone();
431
432                    self.queued_events.push_back(TargetEvent::Request(Request {
433                        method: runtime_cmd.0,
434                        session_id: Some(ev.session_id.clone().into()),
435                        params: runtime_cmd.1,
436                    }));
437                }
438
439                if "service_worker" == &ev.target_info.r#type {
440                    let detach_command = DetachFromTargetParams::builder()
441                        .session_id(ev.session_id.clone())
442                        .build();
443
444                    let method = detach_command.identifier();
445
446                    if let Ok(params) = serde_json::to_value(detach_command) {
447                        self.queued_events.push_back(TargetEvent::Request(Request {
448                            method,
449                            session_id: self.session_id.clone().map(Into::into),
450                            params,
451                        }));
452                    }
453                }
454            }
455            // `NetworkManager` events
456            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
457            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
458            CdpEvent::NetworkRequestWillBeSent(ev) => {
459                self.network_manager.on_request_will_be_sent(ev)
460            }
461            CdpEvent::NetworkRequestServedFromCache(ev) => {
462                self.network_manager.on_request_served_from_cache(ev)
463            }
464            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
465            CdpEvent::NetworkLoadingFinished(ev) => {
466                self.network_manager.on_network_loading_finished(ev)
467            }
468            CdpEvent::NetworkLoadingFailed(ev) => {
469                self.network_manager.on_network_loading_failed(ev)
470            }
471            _ => (),
472        }
473        chromiumoxide_cdp::consume_event!(match params {
474           |ev| self.event_listeners.start_send(ev),
475           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
476        });
477    }
478
479    /// Called when a init command timed out
480    fn on_initialization_failed(&mut self) -> TargetEvent {
481        if let Some(initiator) = self.initiator.take() {
482            let _ = initiator.send(Err(CdpError::Timeout));
483        }
484        self.init_state = TargetInit::Closing;
485        let close_target = CloseTargetParams::new(self.info.target_id.clone());
486
487        TargetEvent::Request(Request {
488            method: close_target.identifier(),
489            session_id: self.session_id.clone().map(Into::into),
490            params: serde_json::to_value(close_target).unwrap_or_default(),
491        })
492    }
493
494    /// Advance that target's state
495    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
496        if !self.is_page() {
497            // can only poll pages
498            return None;
499        }
500
501        match &mut self.init_state {
502            TargetInit::AttachToTarget => {
503                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
504                    self.config.request_timeout,
505                ));
506
507                if let Ok(params) = AttachToTargetParams::builder()
508                    .target_id(self.target_id().clone())
509                    .flatten(true)
510                    .build()
511                {
512                    return Some(TargetEvent::Request(Request::new(
513                        params.identifier(),
514                        serde_json::to_value(params).unwrap_or_default(),
515                    )));
516                } else {
517                    return None;
518                }
519            }
520            TargetInit::InitializingFrame(cmds) => {
521                self.session_id.as_ref()?;
522                if let Poll::Ready(poll) = cmds.poll(now) {
523                    return match poll {
524                        None => {
525                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
526                                let world_name = world_name.clone();
527
528                                if let Some(isolated_world_cmds) =
529                                    self.frame_manager.ensure_isolated_world(&world_name)
530                                {
531                                    *cmds = isolated_world_cmds;
532                                } else {
533                                    self.init_state = TargetInit::InitializingNetwork(
534                                        self.network_manager.init_commands(),
535                                    );
536                                }
537                            } else {
538                                self.init_state = TargetInit::InitializingNetwork(
539                                    self.network_manager.init_commands(),
540                                );
541                            }
542                            self.poll(cx, now)
543                        }
544                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
545                            method,
546                            session_id: self.session_id.clone().map(Into::into),
547                            params,
548                        })),
549                        Some(Err(_)) => Some(self.on_initialization_failed()),
550                    };
551                } else {
552                    return None;
553                }
554            }
555            TargetInit::InitializingNetwork(cmds) => {
556                advance_state!(
557                    self,
558                    cx,
559                    now,
560                    cmds,
561                    TargetInit::InitializingPage(Self::page_init_commands(
562                        self.config.request_timeout
563                    ))
564                );
565            }
566            TargetInit::InitializingPage(cmds) => {
567                advance_state!(
568                    self,
569                    cx,
570                    now,
571                    cmds,
572                    match self.config.viewport.as_ref() {
573                        Some(viewport) => TargetInit::InitializingEmulation(
574                            self.emulation_manager.init_commands(viewport)
575                        ),
576                        None => TargetInit::Initialized,
577                    }
578                );
579            }
580            TargetInit::InitializingEmulation(cmds) => {
581                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
582            }
583            TargetInit::Initialized => {
584                if let Some(initiator) = self.initiator.take() {
585                    // make sure that the main frame of the page has finished loading
586                    if self
587                        .frame_manager
588                        .main_frame()
589                        .map(|frame| frame.is_loaded())
590                        .unwrap_or_default()
591                    {
592                        if let Some(page) = self.get_or_create_page() {
593                            let _ = initiator.send(Ok(page.clone().into()));
594                        } else {
595                            self.initiator = Some(initiator);
596                        }
597                    } else {
598                        self.initiator = Some(initiator);
599                    }
600                }
601            }
602            TargetInit::Closing => return None,
603        };
604
605        // Prune senders whose receivers have been dropped (caller
606        // timed out or was cancelled) so the vecs don't grow unbounded.
607        // Done once per poll() call, outside the inner loop.
608        self.wait_for_frame_navigation.retain(|tx| !tx.is_closed());
609        self.wait_for_network_idle.retain(|tx| !tx.is_closed());
610        self.wait_for_network_almost_idle
611            .retain(|tx| !tx.is_closed());
612
613        loop {
614            if self.init_state == TargetInit::Closing {
615                break None;
616            }
617
618            if let Some(frame) = self.frame_manager.main_frame() {
619                if frame.is_loaded() {
620                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
621                        let _ = tx.send(frame.http_request().cloned());
622                    }
623                }
624
625                if frame.is_network_idle() {
626                    while let Some(tx) = self.wait_for_network_idle.pop() {
627                        let _ = tx.send(frame.http_request().cloned());
628                    }
629                }
630
631                if frame.is_network_almost_idle() {
632                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
633                        let _ = tx.send(frame.http_request().cloned());
634                    }
635                }
636            }
637
638            // Drain queued messages first.
639            if let Some(ev) = self.queued_events.pop_front() {
640                return Some(ev);
641            }
642
643            if let Some(handle) = self.page.as_mut() {
644                // Budget: don't drain more than 64 messages per poll to avoid
645                // starving other targets and the websocket section in the
646                // handler's main loop.
647                let mut recv_budget = 64usize;
648                while let Poll::Ready(Some(msg)) = handle.rx.poll_recv(cx) {
649                    if self.init_state == TargetInit::Closing {
650                        break;
651                    }
652
653                    match msg {
654                        TargetMessage::Command(cmd) => {
655                            if cmd.method == "Network.setBlockedURLs" {
656                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
657                                {
658                                    let mut unblock_all = false;
659                                    let mut block_all = false;
660
661                                    for s in arr.iter().filter_map(|v| v.as_str()) {
662                                        if s == "!*" {
663                                            unblock_all = true;
664                                            break; // "!*" overrides any block rules
665                                        }
666                                        if s.contains('*') {
667                                            block_all = true;
668                                        }
669                                    }
670
671                                    if unblock_all {
672                                        self.network_manager.set_block_all(false);
673                                    } else if block_all {
674                                        self.network_manager.set_block_all(true);
675                                    }
676                                }
677                            }
678                            self.queued_events.push_back(TargetEvent::Command(cmd));
679                        }
680                        TargetMessage::MainFrame(tx) => {
681                            let _ =
682                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
683                        }
684                        TargetMessage::AllFrames(tx) => {
685                            let _ = tx.send(
686                                self.frame_manager
687                                    .frames()
688                                    .map(|f| f.id().clone())
689                                    .collect(),
690                            );
691                        }
692                        #[cfg(feature = "_cache")]
693                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
694                            self.network_manager.set_cache_site_key(cache_key);
695                            self.network_manager.set_cache_policy(cache_policy);
696                        }
697                        TargetMessage::Url(req) => {
698                            let GetUrl { frame_id, tx } = req;
699                            let frame = if let Some(frame_id) = frame_id {
700                                self.frame_manager.frame(&frame_id)
701                            } else {
702                                self.frame_manager.main_frame()
703                            };
704                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
705                        }
706                        TargetMessage::Name(req) => {
707                            let GetName { frame_id, tx } = req;
708                            let frame = if let Some(frame_id) = frame_id {
709                                self.frame_manager.frame(&frame_id)
710                            } else {
711                                self.frame_manager.main_frame()
712                            };
713                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
714                        }
715                        TargetMessage::Parent(req) => {
716                            let GetParent { frame_id, tx } = req;
717                            let frame = self.frame_manager.frame(&frame_id);
718                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
719                        }
720                        TargetMessage::WaitForNavigation(tx) => {
721                            if let Some(frame) = self.frame_manager.main_frame() {
722                                if frame.is_loaded() {
723                                    let _ = tx.send(frame.http_request().cloned());
724                                } else {
725                                    self.wait_for_frame_navigation.push(tx);
726                                }
727                            } else {
728                                self.wait_for_frame_navigation.push(tx);
729                            }
730                        }
731                        TargetMessage::WaitForNetworkIdle(tx) => {
732                            if let Some(frame) = self.frame_manager.main_frame() {
733                                if frame.is_network_idle() {
734                                    let _ = tx.send(frame.http_request().cloned());
735                                } else {
736                                    self.wait_for_network_idle.push(tx);
737                                }
738                            } else {
739                                self.wait_for_network_idle.push(tx);
740                            }
741                        }
742                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
743                            if let Some(frame) = self.frame_manager.main_frame() {
744                                if frame.is_network_almost_idle() {
745                                    let _ = tx.send(frame.http_request().cloned());
746                                } else {
747                                    self.wait_for_network_almost_idle.push(tx);
748                                }
749                            } else {
750                                self.wait_for_network_almost_idle.push(tx);
751                            }
752                        }
753                        TargetMessage::AddEventListener(req) => {
754                            if req.method == "Fetch.requestPaused" {
755                                self.network_manager.enable_request_intercept();
756                            }
757                            // register a new listener
758                            self.event_listeners.add_listener(req);
759                        }
760                        TargetMessage::GetExecutionContext(ctx) => {
761                            let GetExecutionContext {
762                                dom_world,
763                                frame_id,
764                                tx,
765                            } = ctx;
766                            let frame = if let Some(frame_id) = frame_id {
767                                self.frame_manager.frame(&frame_id)
768                            } else {
769                                self.frame_manager.main_frame()
770                            };
771
772                            if let Some(frame) = frame {
773                                match dom_world {
774                                    DOMWorldKind::Main => {
775                                        let _ = tx.send(frame.main_world().execution_context());
776                                    }
777                                    DOMWorldKind::Secondary => {
778                                        let _ =
779                                            tx.send(frame.secondary_world().execution_context());
780                                    }
781                                }
782                            } else {
783                                let _ = tx.send(None);
784                            }
785                        }
786                        TargetMessage::Authenticate(credentials) => {
787                            self.network_manager.authenticate(credentials);
788                        }
789                        TargetMessage::BlockNetwork(blocked) => {
790                            self.network_manager.set_block_all(blocked);
791                        }
792                        TargetMessage::EnableInterception(enabled) => {
793                            // if interception is enabled disable the user facing handling.
794                            self.network_manager.user_request_interception_enabled = !enabled;
795                        }
796                    }
797
798                    recv_budget -= 1;
799                    if recv_budget == 0 {
800                        break;
801                    }
802                }
803            }
804
805            while let Some(event) = self.network_manager.poll() {
806                if self.init_state == TargetInit::Closing {
807                    break;
808                }
809                match event {
810                    NetworkEvent::SendCdpRequest((method, params)) => {
811                        // send a message to the browser
812                        self.queued_events.push_back(TargetEvent::Request(Request {
813                            method,
814                            session_id: self.session_id.clone().map(Into::into),
815                            params,
816                        }))
817                    }
818                    NetworkEvent::Request(_) => {}
819                    NetworkEvent::Response(_) => {}
820                    NetworkEvent::RequestFailed(request) => {
821                        self.frame_manager.on_http_request_finished(request);
822                    }
823                    NetworkEvent::RequestFinished(request) => {
824                        self.frame_manager.on_http_request_finished(request);
825                    }
826                    NetworkEvent::BytesConsumed(n) => {
827                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
828                    }
829                }
830            }
831
832            while let Some(event) = self.frame_manager.poll(now) {
833                if self.init_state == TargetInit::Closing {
834                    break;
835                }
836                match event {
837                    FrameEvent::NavigationResult(res) => {
838                        self.queued_events
839                            .push_back(TargetEvent::NavigationResult(res));
840                    }
841                    FrameEvent::NavigationRequest(id, req) => {
842                        self.queued_events
843                            .push_back(TargetEvent::NavigationRequest(id, req));
844                    }
845                }
846            }
847
848            if self.queued_events.is_empty() {
849                return None;
850            }
851        }
852    }
853
854    /// Set the sender half of the channel who requested the creation of this
855    /// target
856    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
857        self.initiator = Some(tx);
858    }
859
860    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
861        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
862    }
863}
864
865/// Configuration for how a single target/page should be fetched and processed.
866#[derive(Debug, Clone)]
867pub struct TargetConfig {
868    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
869    /// When `true`, connections will proceed even if certificate validation fails.
870    pub ignore_https_errors: bool,
871    /// Request timeout to use for the main navigation / resource fetch.
872    /// This is the total time allowed before a request is considered failed.
873    pub request_timeout: Duration,
874    /// Optional browser viewport to use for this target.
875    /// When `None`, the default viewport (or headless browser default) is used.
876    pub viewport: Option<Viewport>,
877    /// Enable request interception for this target.
878    /// When `true`, all network requests will pass through the intercept manager.
879    pub request_intercept: bool,
880    /// Enable caching for this target.
881    /// When `true`, responses may be read from and written to the cache layer.
882    pub cache_enabled: bool,
883    /// If `true`, skip visual/asset resources that are not required for HTML content
884    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
885    pub ignore_visuals: bool,
886    /// If `true`, block JavaScript execution (or avoid loading JS resources)
887    /// for this target. This is useful for purely static HTML crawls.
888    pub ignore_javascript: bool,
889    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
890    /// common tracker domains, etc.).
891    pub ignore_analytics: bool,
892    /// Ignore prefetching.
893    pub ignore_prefetch: bool,
894    /// If `true`, block stylesheets and related CSS resources for this target.
895    /// This can reduce bandwidth when only raw HTML is needed.
896    pub ignore_stylesheets: bool,
897    /// If `true`, only HTML documents will be fetched/kept.
898    /// Non-HTML subresources may be skipped entirely.
899    pub only_html: bool,
900    /// Whether service workers are allowed for this target.
901    /// When `true`, service workers may register and intercept requests.
902    pub service_worker_enabled: bool,
903    /// Extra HTTP headers to send with each request for this target.
904    /// Keys should be header names, values their corresponding header values.
905    pub extra_headers: Option<std::collections::HashMap<String, String>>,
906    /// Network intercept manager used to make allow/deny/modify decisions
907    /// for requests when `request_intercept` is enabled.
908    pub intercept_manager: NetworkInterceptManager,
909    /// The maximum number of response bytes allowed for this target.
910    /// When set, responses larger than this limit may be truncated or aborted.
911    pub max_bytes_allowed: Option<u64>,
912    /// Whitelist patterns to allow through the network.
913    pub whitelist_patterns: Option<Vec<String>>,
914    /// Blacklist patterns to black through the network.
915    pub blacklist_patterns: Option<Vec<String>>,
916    /// Extra ABP/uBO filter rules for the adblock engine.
917    #[cfg(feature = "adblock")]
918    pub adblock_filter_rules: Option<Vec<String>>,
919}
920
921impl Default for TargetConfig {
922    fn default() -> Self {
923        Self {
924            ignore_https_errors: true,
925            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
926            viewport: Default::default(),
927            request_intercept: false,
928            cache_enabled: true,
929            service_worker_enabled: true,
930            ignore_javascript: false,
931            ignore_visuals: false,
932            ignore_stylesheets: false,
933            ignore_analytics: true,
934            ignore_prefetch: true,
935            only_html: false,
936            extra_headers: Default::default(),
937            intercept_manager: NetworkInterceptManager::Unknown,
938            max_bytes_allowed: None,
939            whitelist_patterns: None,
940            blacklist_patterns: None,
941            #[cfg(feature = "adblock")]
942            adblock_filter_rules: None,
943        }
944    }
945}
946
947#[derive(Debug, Clone, Eq, PartialEq)]
948pub enum TargetType {
949    Page,
950    BackgroundPage,
951    ServiceWorker,
952    SharedWorker,
953    Other,
954    Browser,
955    Webview,
956    Unknown(String),
957}
958
959impl TargetType {
960    pub fn new(ty: &str) -> Self {
961        match ty {
962            "page" => TargetType::Page,
963            "background_page" => TargetType::BackgroundPage,
964            "service_worker" => TargetType::ServiceWorker,
965            "shared_worker" => TargetType::SharedWorker,
966            "other" => TargetType::Other,
967            "browser" => TargetType::Browser,
968            "webview" => TargetType::Webview,
969            s => TargetType::Unknown(s.to_string()),
970        }
971    }
972
973    pub fn is_page(&self) -> bool {
974        matches!(self, TargetType::Page)
975    }
976
977    pub fn is_background_page(&self) -> bool {
978        matches!(self, TargetType::BackgroundPage)
979    }
980
981    pub fn is_service_worker(&self) -> bool {
982        matches!(self, TargetType::ServiceWorker)
983    }
984
985    pub fn is_shared_worker(&self) -> bool {
986        matches!(self, TargetType::SharedWorker)
987    }
988
989    pub fn is_other(&self) -> bool {
990        matches!(self, TargetType::Other)
991    }
992
993    pub fn is_browser(&self) -> bool {
994        matches!(self, TargetType::Browser)
995    }
996
997    pub fn is_webview(&self) -> bool {
998        matches!(self, TargetType::Webview)
999    }
1000}
1001
1002#[derive(Debug)]
1003pub(crate) enum TargetEvent {
1004    /// An internal request
1005    Request(Request),
1006    /// An internal navigation request
1007    NavigationRequest(NavigationId, Request),
1008    /// Indicates that a previous requested navigation has finished
1009    NavigationResult(Result<NavigationOk, NavigationError>),
1010    /// A new command arrived via a channel
1011    Command(CommandMessage),
1012    /// The bytes consumed by the network.
1013    BytesConsumed(u64),
1014}
1015
1016// TODO this can be moved into the classes?
1017#[derive(Debug, PartialEq)]
1018pub enum TargetInit {
1019    InitializingFrame(CommandChain),
1020    InitializingNetwork(CommandChain),
1021    InitializingPage(CommandChain),
1022    InitializingEmulation(CommandChain),
1023    AttachToTarget,
1024    Initialized,
1025    Closing,
1026}
1027
1028impl TargetInit {
1029    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
1030        match self {
1031            TargetInit::InitializingFrame(cmd) => Some(cmd),
1032            TargetInit::InitializingNetwork(cmd) => Some(cmd),
1033            TargetInit::InitializingPage(cmd) => Some(cmd),
1034            TargetInit::InitializingEmulation(cmd) => Some(cmd),
1035            TargetInit::AttachToTarget => None,
1036            TargetInit::Initialized => None,
1037            TargetInit::Closing => None,
1038        }
1039    }
1040}
1041
1042#[derive(Debug)]
1043pub struct GetExecutionContext {
1044    /// For which world the execution context was requested
1045    pub dom_world: DOMWorldKind,
1046    /// The if of the frame to get the `ExecutionContext` for
1047    pub frame_id: Option<FrameId>,
1048    /// Sender half of the channel to send the response back
1049    pub tx: Sender<Option<ExecutionContextId>>,
1050}
1051
1052impl GetExecutionContext {
1053    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
1054        Self {
1055            dom_world: DOMWorldKind::Main,
1056            frame_id: None,
1057            tx,
1058        }
1059    }
1060}
1061
1062#[derive(Debug)]
1063pub struct GetUrl {
1064    /// The id of the frame to get the url for (None = main frame)
1065    pub frame_id: Option<FrameId>,
1066    /// Sender half of the channel to send the response back
1067    pub tx: Sender<Option<String>>,
1068}
1069
1070impl GetUrl {
1071    pub fn new(tx: Sender<Option<String>>) -> Self {
1072        Self { frame_id: None, tx }
1073    }
1074}
1075
1076#[derive(Debug)]
1077pub struct GetName {
1078    /// The id of the frame to get the name for (None = main frame)
1079    pub frame_id: Option<FrameId>,
1080    /// Sender half of the channel to send the response back
1081    pub tx: Sender<Option<String>>,
1082}
1083
1084#[derive(Debug)]
1085pub struct GetParent {
1086    /// The id of the frame to get the parent for (None = main frame)
1087    pub frame_id: FrameId,
1088    /// Sender half of the channel to send the response back
1089    pub tx: Sender<Option<FrameId>>,
1090}
1091
1092#[derive(Debug)]
1093pub enum TargetMessage {
1094    /// Execute a command within the session of this target
1095    Command(CommandMessage),
1096    /// Return the main frame of this target's page
1097    MainFrame(Sender<Option<FrameId>>),
1098    /// Return all the frames of this target's page
1099    AllFrames(Sender<Vec<FrameId>>),
1100    #[cfg(feature = "_cache")]
1101    /// Set the cache key and policy for the target page.
1102    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1103    /// Return the url if available
1104    Url(GetUrl),
1105    /// Return the name if available
1106    Name(GetName),
1107    /// Return the parent id of a frame
1108    Parent(GetParent),
1109    /// A Message that resolves when the frame finished loading a new url
1110    WaitForNavigation(Sender<ArcHttpRequest>),
1111    /// A Message that resolves when the frame network is idle
1112    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1113    /// A Message that resolves when the frame network is almost idle
1114    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1115    /// A request to submit a new listener that gets notified with every
1116    /// received event
1117    AddEventListener(EventListenerRequest),
1118    /// Get the `ExecutionContext` if available
1119    GetExecutionContext(GetExecutionContext),
1120    Authenticate(Credentials),
1121    /// Set block/unblocked networking
1122    BlockNetwork(bool),
1123    /// Enable/Disable internal request paused interception
1124    EnableInterception(bool),
1125}