chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::pin::Pin;
3use std::sync::Arc;
4use std::time::Instant;
5
6use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
7use futures::channel::oneshot::Sender;
8use futures::stream::Stream;
9use futures::task::{Context, Poll};
10
11use crate::auth::Credentials;
12use crate::cdp::browser_protocol::target::CloseTargetParams;
13use crate::cmd::CommandChain;
14use crate::cmd::CommandMessage;
15use crate::error::{CdpError, Result};
16use crate::handler::browser::BrowserContext;
17use crate::handler::domworld::DOMWorldKind;
18use crate::handler::emulation::EmulationManager;
19use crate::handler::frame::FrameRequestedNavigation;
20use crate::handler::frame::{
21    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
22};
23use crate::handler::network::{NetworkEvent, NetworkManager};
24use crate::handler::page::PageHandle;
25use crate::handler::viewport::Viewport;
26use crate::handler::{PageInner, REQUEST_TIMEOUT};
27use crate::listeners::{EventListenerRequest, EventListeners};
28use crate::{page::Page, ArcHttpRequest};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    browser::BrowserContextId,
31    log as cdplog,
32    page::{FrameId, GetFrameTreeParams},
33    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
34};
35use chromiumoxide_cdp::cdp::events::CdpEvent;
36use chromiumoxide_cdp::cdp::js_protocol::runtime::{
37    ExecutionContextId, RunIfWaitingForDebuggerParams,
38};
39use chromiumoxide_cdp::cdp::CdpEventMessage;
40use chromiumoxide_types::{Command, Method, Request, Response};
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42use std::time::Duration;
43
44macro_rules! advance_state {
45    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
46        if let Poll::Ready(poll) = $cmds.poll($now) {
47            return match poll {
48                None => {
49                    $s.init_state = $next_state;
50                    $s.poll($cx, $now)
51                }
52                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
53                    method,
54                    session_id: $s.session_id.clone().map(Into::into),
55                    params,
56                })),
57                Some(Err(_)) => Some($s.on_initialization_failed()),
58            };
59        } else {
60            return None;
61        }
62    }};
63}
64
65lazy_static::lazy_static! {
66    /// Initial start command params.
67    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
68        if let Ok(attach) = SetAutoAttachParams::builder()
69            .flatten(true)
70            .auto_attach(true)
71            .wait_for_debugger_on_start(true)
72            .build() {
73                let disable_log = cdplog::DisableParams::default();
74
75                let mut cmds =  vec![
76                    (
77                        attach.identifier(),
78                        serde_json::to_value(attach).unwrap_or_default(),
79                    ),
80                    (
81                        disable_log.identifier(),
82                        serde_json::to_value(disable_log).unwrap_or_default(),
83                    )
84                ];
85
86                // enable performance on pages.
87                if cfg!(feature = "collect_metrics") {
88                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
89                    cmds.push((
90                        enable_performance.identifier(),
91                        serde_json::to_value(enable_performance).unwrap_or_default(),
92                    ));
93                }
94
95                cmds
96            } else {
97                vec![]
98            }
99    };
100
101    /// Attach to target commands
102    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
103        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
104
105        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
106    };
107}
108
109#[derive(Debug)]
110pub struct Target {
111    /// Info about this target as returned from the chromium instance
112    info: TargetInfo,
113    /// The type of this target
114    r#type: TargetType,
115    /// Configs for this target
116    config: TargetConfig,
117    /// The context this target is running in
118    browser_context: BrowserContext,
119    /// The frame manager that maintains the state of all frames and handles
120    /// navigations of frames
121    frame_manager: FrameManager,
122    /// Handles all the https
123    pub(crate) network_manager: NetworkManager,
124    emulation_manager: EmulationManager,
125    /// The identifier of the session this target is attached to
126    session_id: Option<SessionId>,
127    /// The handle of the browser page of this target
128    page: Option<PageHandle>,
129    /// Drives this target towards initialization
130    pub(crate) init_state: TargetInit,
131    /// Currently queued events to report to the `Handler`
132    queued_events: VecDeque<TargetEvent>,
133    /// All registered event subscriptions
134    event_listeners: EventListeners,
135    /// Senders that need to be notified once the main frame has loaded
136    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
137    /// Senders that need to be notified once the main frame reaches `networkIdle`.
138    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
139    /// (Optional) for `networkAlmostIdle` if you want it as well.
140    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
141    /// The sender who requested the page.
142    initiator: Option<Sender<Result<Page>>>,
143}
144
145impl Target {
146    /// Create a new target instance with `TargetInfo` after a
147    /// `CreateTargetParams` request.
148    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
149        let ty = TargetType::new(&info.r#type);
150        let request_timeout: Duration = config.request_timeout;
151        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
152
153        if !config.cache_enabled {
154            network_manager.set_cache_enabled(false);
155        }
156
157        if !config.service_worker_enabled {
158            network_manager.set_service_worker_enabled(true);
159        }
160
161        network_manager.set_request_interception(config.request_intercept);
162        network_manager.max_bytes_allowed = config.max_bytes_allowed;
163
164        if let Some(headers) = &config.extra_headers {
165            network_manager.set_extra_headers(headers.clone());
166        }
167
168        if let Some(white_list) = &config.whitelist_patterns {
169            network_manager.set_whitelist_patterns(white_list.clone());
170        }
171
172        network_manager.ignore_visuals = config.ignore_visuals;
173        network_manager.block_javascript = config.ignore_javascript;
174        network_manager.block_analytics = config.ignore_analytics;
175        network_manager.block_stylesheets = config.ignore_stylesheets;
176        network_manager.only_html = config.only_html;
177        network_manager.intercept_manager = config.intercept_manager;
178
179        Self {
180            info,
181            r#type: ty,
182            config,
183            frame_manager: FrameManager::new(request_timeout),
184            network_manager,
185            emulation_manager: EmulationManager::new(request_timeout),
186            session_id: None,
187            page: None,
188            init_state: TargetInit::AttachToTarget,
189            wait_for_frame_navigation: Default::default(),
190            wait_for_network_idle: Default::default(),
191            wait_for_network_almost_idle: Default::default(),
192            queued_events: Default::default(),
193            event_listeners: Default::default(),
194            initiator: None,
195            browser_context,
196        }
197    }
198
199    /// Set the session id.
200    pub fn set_session_id(&mut self, id: SessionId) {
201        self.session_id = Some(id)
202    }
203
204    /// Get the session id.
205    pub fn session_id(&self) -> Option<&SessionId> {
206        self.session_id.as_ref()
207    }
208
209    /// Get the session id mut.
210    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
211        &mut self.session_id
212    }
213
214    /// Get the browser context.
215    pub fn browser_context(&self) -> &BrowserContext {
216        &self.browser_context
217    }
218
219    /// The identifier for this target
220    pub fn target_id(&self) -> &TargetId {
221        &self.info.target_id
222    }
223
224    /// The type of this target
225    pub fn r#type(&self) -> &TargetType {
226        &self.r#type
227    }
228
229    /// Whether this target is already initialized
230    pub fn is_initialized(&self) -> bool {
231        matches!(self.init_state, TargetInit::Initialized)
232    }
233
234    /// Navigate a frame
235    pub fn goto(&mut self, req: FrameRequestedNavigation) {
236        if self.network_manager.has_target_domain() {
237            self.network_manager.clear_target_domain();
238            let goto_url = req
239                .req
240                .params
241                .as_object()
242                .and_then(|o| o.get("url"))
243                .and_then(|v| v.as_str());
244
245            if let Some(url) = goto_url {
246                self.network_manager.set_page_url(url.into());
247            }
248        }
249        self.frame_manager.goto(req);
250    }
251
252    /// Create a new page from the session.
253    fn create_page(&mut self) {
254        if self.page.is_none() {
255            if let Some(session) = self.session_id.clone() {
256                let handle =
257                    PageHandle::new(self.target_id().clone(), session, self.opener_id().cloned());
258                self.page = Some(handle);
259            }
260        }
261    }
262
263    /// Tries to create the `PageInner` if this target is already initialized
264    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
265        self.create_page();
266        self.page.as_ref().map(|p| p.inner())
267    }
268
269    /// Is the target a page?
270    pub fn is_page(&self) -> bool {
271        self.r#type().is_page()
272    }
273
274    /// The browser context ID.
275    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
276        self.info.browser_context_id.as_ref()
277    }
278
279    /// The target connection info.
280    pub fn info(&self) -> &TargetInfo {
281        &self.info
282    }
283
284    /// Get the target that opened this target. Top-level targets return `None`.
285    pub fn opener_id(&self) -> Option<&TargetId> {
286        self.info.opener_id.as_ref()
287    }
288
289    pub fn frame_manager(&self) -> &FrameManager {
290        &self.frame_manager
291    }
292
293    /// The frame manager.
294    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
295        &mut self.frame_manager
296    }
297
298    /// Get event listeners mutably.
299    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
300        &mut self.event_listeners
301    }
302
303    /// Received a response to a command issued by this target
304    pub fn on_response(&mut self, resp: Response, method: &str) {
305        if let Some(cmds) = self.init_state.commands_mut() {
306            cmds.received_response(method);
307        }
308
309        if let GetFrameTreeParams::IDENTIFIER = method {
310            if let Some(resp) = resp
311                .result
312                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
313            {
314                self.frame_manager.on_frame_tree(resp.frame_tree);
315            }
316        }
317        // requests originated from the network manager all return an empty response, hence they
318        // can be ignored here
319    }
320
321    /// On CDP Event message.
322    pub fn on_event(&mut self, event: CdpEventMessage) {
323        let CdpEventMessage {
324            params,
325            method,
326            session_id,
327            ..
328        } = event;
329
330        let is_session_scoped = matches!(
331            params,
332            CdpEvent::FetchRequestPaused(_)
333                | CdpEvent::FetchAuthRequired(_)
334                | CdpEvent::NetworkRequestWillBeSent(_)
335                | CdpEvent::NetworkResponseReceived(_)
336                | CdpEvent::NetworkLoadingFinished(_)
337                | CdpEvent::NetworkLoadingFailed(_)
338                | CdpEvent::PageFrameAttached(_)
339                | CdpEvent::PageFrameDetached(_)
340                | CdpEvent::PageFrameNavigated(_)
341                | CdpEvent::PageNavigatedWithinDocument(_)
342                | CdpEvent::PageLifecycleEvent(_)
343                | CdpEvent::PageFrameStartedLoading(_)
344                | CdpEvent::PageFrameStoppedLoading(_)
345                | CdpEvent::RuntimeExecutionContextCreated(_)
346                | CdpEvent::RuntimeExecutionContextDestroyed(_)
347                | CdpEvent::RuntimeExecutionContextsCleared(_)
348                | CdpEvent::RuntimeBindingCalled(_)
349        );
350
351        if is_session_scoped {
352            let ev_sid: &str = match session_id.as_deref() {
353                Some(s) => s,
354                None => return,
355            };
356
357            let self_sid: &str = match self.session_id.as_ref() {
358                Some(sid) => sid.as_ref(),
359                None => return,
360            };
361
362            if self_sid != ev_sid {
363                return;
364            }
365        }
366
367        match &params {
368            // `FrameManager` events
369            CdpEvent::PageFrameAttached(ev) => self
370                .frame_manager
371                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
372            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
373            CdpEvent::PageFrameNavigated(ev) => {
374                self.frame_manager.on_frame_navigated(&ev.frame);
375            }
376            CdpEvent::PageNavigatedWithinDocument(ev) => {
377                self.frame_manager.on_frame_navigated_within_document(ev)
378            }
379            CdpEvent::RuntimeExecutionContextCreated(ev) => {
380                self.frame_manager.on_frame_execution_context_created(ev)
381            }
382            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
383                self.frame_manager.on_frame_execution_context_destroyed(ev)
384            }
385            CdpEvent::RuntimeExecutionContextsCleared(_) => {
386                self.frame_manager.on_execution_contexts_cleared()
387            }
388            CdpEvent::RuntimeBindingCalled(ev) => {
389                // TODO check if binding registered and payload is json
390                self.frame_manager.on_runtime_binding_called(ev)
391            }
392            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
393            CdpEvent::PageFrameStartedLoading(ev) => {
394                self.frame_manager.on_frame_started_loading(ev);
395            }
396            CdpEvent::PageFrameStoppedLoading(ev) => {
397                self.frame_manager.on_frame_stopped_loading(ev);
398            }
399            // `Target` events
400            CdpEvent::TargetAttachedToTarget(ev) => {
401                if ev.waiting_for_debugger {
402                    let runtime_cmd = ATTACH_TARGET.clone();
403
404                    self.queued_events.push_back(TargetEvent::Request(Request {
405                        method: runtime_cmd.0,
406                        session_id: Some(ev.session_id.clone().into()),
407                        params: runtime_cmd.1,
408                    }));
409                }
410
411                if "service_worker" == &ev.target_info.r#type {
412                    let detach_command = DetachFromTargetParams::builder()
413                        .session_id(ev.session_id.clone())
414                        .build();
415
416                    let method = detach_command.identifier();
417
418                    if let Ok(params) = serde_json::to_value(detach_command) {
419                        self.queued_events.push_back(TargetEvent::Request(Request {
420                            method,
421                            session_id: self.session_id.clone().map(Into::into),
422                            params,
423                        }));
424                    }
425                }
426            }
427            // `NetworkManager` events
428            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
429            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
430            CdpEvent::NetworkRequestWillBeSent(ev) => {
431                self.network_manager.on_request_will_be_sent(ev)
432            }
433            CdpEvent::NetworkRequestServedFromCache(ev) => {
434                self.network_manager.on_request_served_from_cache(ev)
435            }
436            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
437            CdpEvent::NetworkLoadingFinished(ev) => {
438                self.network_manager.on_network_loading_finished(ev)
439            }
440            CdpEvent::NetworkLoadingFailed(ev) => {
441                self.network_manager.on_network_loading_failed(ev)
442            }
443            _ => (),
444        }
445        chromiumoxide_cdp::consume_event!(match params {
446           |ev| self.event_listeners.start_send(ev),
447           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
448        });
449    }
450
451    /// Called when a init command timed out
452    fn on_initialization_failed(&mut self) -> TargetEvent {
453        if let Some(initiator) = self.initiator.take() {
454            let _ = initiator.send(Err(CdpError::Timeout));
455        }
456        self.init_state = TargetInit::Closing;
457        let close_target = CloseTargetParams::new(self.info.target_id.clone());
458
459        TargetEvent::Request(Request {
460            method: close_target.identifier(),
461            session_id: self.session_id.clone().map(Into::into),
462            params: serde_json::to_value(close_target).unwrap_or_default(),
463        })
464    }
465
466    /// Advance that target's state
467    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
468        if !self.is_page() {
469            // can only poll pages
470            return None;
471        }
472
473        match &mut self.init_state {
474            TargetInit::AttachToTarget => {
475                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
476                    self.config.request_timeout,
477                ));
478
479                if let Ok(params) = AttachToTargetParams::builder()
480                    .target_id(self.target_id().clone())
481                    .flatten(true)
482                    .build()
483                {
484                    return Some(TargetEvent::Request(Request::new(
485                        params.identifier(),
486                        serde_json::to_value(params).unwrap_or_default(),
487                    )));
488                } else {
489                    return None;
490                }
491            }
492            TargetInit::InitializingFrame(cmds) => {
493                self.session_id.as_ref()?;
494                if let Poll::Ready(poll) = cmds.poll(now) {
495                    return match poll {
496                        None => {
497                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
498                                let world_name = world_name.clone();
499
500                                if let Some(isolated_world_cmds) =
501                                    self.frame_manager.ensure_isolated_world(&world_name)
502                                {
503                                    *cmds = isolated_world_cmds;
504                                } else {
505                                    self.init_state = TargetInit::InitializingNetwork(
506                                        self.network_manager.init_commands(),
507                                    );
508                                }
509                            } else {
510                                self.init_state = TargetInit::InitializingNetwork(
511                                    self.network_manager.init_commands(),
512                                );
513                            }
514                            self.poll(cx, now)
515                        }
516                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
517                            method,
518                            session_id: self.session_id.clone().map(Into::into),
519                            params,
520                        })),
521                        Some(Err(_)) => Some(self.on_initialization_failed()),
522                    };
523                } else {
524                    return None;
525                }
526            }
527            TargetInit::InitializingNetwork(cmds) => {
528                advance_state!(
529                    self,
530                    cx,
531                    now,
532                    cmds,
533                    TargetInit::InitializingPage(Self::page_init_commands(
534                        self.config.request_timeout
535                    ))
536                );
537            }
538            TargetInit::InitializingPage(cmds) => {
539                advance_state!(
540                    self,
541                    cx,
542                    now,
543                    cmds,
544                    match self.config.viewport.as_ref() {
545                        Some(viewport) => TargetInit::InitializingEmulation(
546                            self.emulation_manager.init_commands(viewport)
547                        ),
548                        None => TargetInit::Initialized,
549                    }
550                );
551            }
552            TargetInit::InitializingEmulation(cmds) => {
553                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
554            }
555            TargetInit::Initialized => {
556                if let Some(initiator) = self.initiator.take() {
557                    // make sure that the main frame of the page has finished loading
558                    if self
559                        .frame_manager
560                        .main_frame()
561                        .map(|frame| frame.is_loaded())
562                        .unwrap_or_default()
563                    {
564                        if let Some(page) = self.get_or_create_page() {
565                            let _ = initiator.send(Ok(page.clone().into()));
566                        } else {
567                            self.initiator = Some(initiator);
568                        }
569                    } else {
570                        self.initiator = Some(initiator);
571                    }
572                }
573            }
574            TargetInit::Closing => return None,
575        };
576
577        loop {
578            if self.init_state == TargetInit::Closing {
579                break None;
580            }
581
582            if let Some(frame) = self.frame_manager.main_frame() {
583                if frame.is_loaded() {
584                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
585                        let _ = tx.send(frame.http_request().cloned());
586                    }
587                }
588
589                if frame.is_network_idle() {
590                    while let Some(tx) = self.wait_for_network_idle.pop() {
591                        let _ = tx.send(frame.http_request().cloned());
592                    }
593                }
594
595                if frame.is_network_almost_idle() {
596                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
597                        let _ = tx.send(frame.http_request().cloned());
598                    }
599                }
600            }
601
602            // Drain queued messages first.
603            if let Some(ev) = self.queued_events.pop_front() {
604                return Some(ev);
605            }
606
607            if let Some(handle) = self.page.as_mut() {
608                while let Poll::Ready(Some(msg)) = Pin::new(&mut handle.rx).poll_next(cx) {
609                    if self.init_state == TargetInit::Closing {
610                        break;
611                    }
612
613                    match msg {
614                        TargetMessage::Command(cmd) => {
615                            if cmd.method == "Network.setBlockedURLs" {
616                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
617                                {
618                                    let mut unblock_all = false;
619                                    let mut block_all = false;
620
621                                    for s in arr.iter().filter_map(|v| v.as_str()) {
622                                        if s == "!*" {
623                                            unblock_all = true;
624                                            break; // "!*" overrides any block rules
625                                        }
626                                        if s.contains('*') {
627                                            block_all = true;
628                                        }
629                                    }
630
631                                    if unblock_all {
632                                        self.network_manager.set_block_all(false);
633                                    } else if block_all {
634                                        self.network_manager.set_block_all(true);
635                                    }
636                                }
637                            }
638                            self.queued_events.push_back(TargetEvent::Command(cmd));
639                        }
640                        TargetMessage::MainFrame(tx) => {
641                            let _ =
642                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
643                        }
644                        TargetMessage::AllFrames(tx) => {
645                            let _ = tx.send(
646                                self.frame_manager
647                                    .frames()
648                                    .map(|f| f.id().clone())
649                                    .collect(),
650                            );
651                        }
652                        #[cfg(feature = "_cache")]
653                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
654                            self.network_manager.set_cache_site_key(cache_key);
655                            self.network_manager.set_cache_policy(cache_policy);
656                        }
657                        TargetMessage::Url(req) => {
658                            let GetUrl { frame_id, tx } = req;
659                            let frame = if let Some(frame_id) = frame_id {
660                                self.frame_manager.frame(&frame_id)
661                            } else {
662                                self.frame_manager.main_frame()
663                            };
664                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
665                        }
666                        TargetMessage::Name(req) => {
667                            let GetName { frame_id, tx } = req;
668                            let frame = if let Some(frame_id) = frame_id {
669                                self.frame_manager.frame(&frame_id)
670                            } else {
671                                self.frame_manager.main_frame()
672                            };
673                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
674                        }
675                        TargetMessage::Parent(req) => {
676                            let GetParent { frame_id, tx } = req;
677                            let frame = self.frame_manager.frame(&frame_id);
678                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
679                        }
680                        TargetMessage::WaitForNavigation(tx) => {
681                            if let Some(frame) = self.frame_manager.main_frame() {
682                                // TODO submit a navigation watcher: waitForFrameNavigation
683
684                                // TODO return the watchers navigationResponse
685                                if frame.is_loaded() {
686                                    let _ = tx.send(frame.http_request().cloned());
687                                } else {
688                                    self.wait_for_frame_navigation.push(tx);
689                                }
690                            } else {
691                                self.wait_for_frame_navigation.push(tx);
692                            }
693                        }
694                        TargetMessage::WaitForNetworkIdle(tx) => {
695                            if let Some(frame) = self.frame_manager.main_frame() {
696                                if frame.is_network_idle() {
697                                    let _ = tx.send(frame.http_request().cloned());
698                                } else {
699                                    self.wait_for_network_idle.push(tx);
700                                }
701                            } else {
702                                self.wait_for_network_idle.push(tx);
703                            }
704                        }
705                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
706                            if let Some(frame) = self.frame_manager.main_frame() {
707                                if frame.is_network_almost_idle() {
708                                    let _ = tx.send(frame.http_request().cloned());
709                                } else {
710                                    self.wait_for_network_almost_idle.push(tx);
711                                }
712                            } else {
713                                self.wait_for_network_almost_idle.push(tx);
714                            }
715                        }
716                        TargetMessage::AddEventListener(req) => {
717                            if req.method == "Fetch.requestPaused" {
718                                self.network_manager.enable_request_intercept();
719                            }
720                            // register a new listener
721                            self.event_listeners.add_listener(req);
722                        }
723                        TargetMessage::GetExecutionContext(ctx) => {
724                            let GetExecutionContext {
725                                dom_world,
726                                frame_id,
727                                tx,
728                            } = ctx;
729                            let frame = if let Some(frame_id) = frame_id {
730                                self.frame_manager.frame(&frame_id)
731                            } else {
732                                self.frame_manager.main_frame()
733                            };
734
735                            if let Some(frame) = frame {
736                                match dom_world {
737                                    DOMWorldKind::Main => {
738                                        let _ = tx.send(frame.main_world().execution_context());
739                                    }
740                                    DOMWorldKind::Secondary => {
741                                        let _ =
742                                            tx.send(frame.secondary_world().execution_context());
743                                    }
744                                }
745                            } else {
746                                let _ = tx.send(None);
747                            }
748                        }
749                        TargetMessage::Authenticate(credentials) => {
750                            self.network_manager.authenticate(credentials);
751                        }
752                        TargetMessage::BlockNetwork(blocked) => {
753                            self.network_manager.set_block_all(blocked);
754                        }
755                    }
756                }
757            }
758
759            while let Some(event) = self.network_manager.poll() {
760                if self.init_state == TargetInit::Closing {
761                    break;
762                }
763                match event {
764                    NetworkEvent::SendCdpRequest((method, params)) => {
765                        // send a message to the browser
766                        self.queued_events.push_back(TargetEvent::Request(Request {
767                            method,
768                            session_id: self.session_id.clone().map(Into::into),
769                            params,
770                        }))
771                    }
772                    NetworkEvent::Request(_) => {}
773                    NetworkEvent::Response(_) => {}
774                    NetworkEvent::RequestFailed(request) => {
775                        self.frame_manager.on_http_request_finished(request);
776                    }
777                    NetworkEvent::RequestFinished(request) => {
778                        self.frame_manager.on_http_request_finished(request);
779                    }
780                    NetworkEvent::BytesConsumed(n) => {
781                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
782                    }
783                }
784            }
785
786            while let Some(event) = self.frame_manager.poll(now) {
787                if self.init_state == TargetInit::Closing {
788                    break;
789                }
790                match event {
791                    FrameEvent::NavigationResult(res) => {
792                        self.queued_events
793                            .push_back(TargetEvent::NavigationResult(res));
794                    }
795                    FrameEvent::NavigationRequest(id, req) => {
796                        self.queued_events
797                            .push_back(TargetEvent::NavigationRequest(id, req));
798                    }
799                }
800            }
801
802            if self.queued_events.is_empty() {
803                return None;
804            }
805        }
806    }
807
808    /// Set the sender half of the channel who requested the creation of this
809    /// target
810    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
811        self.initiator = Some(tx);
812    }
813
814    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
815        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
816    }
817}
818
819/// Configuration for how a single target/page should be fetched and processed.
820#[derive(Debug, Clone)]
821pub struct TargetConfig {
822    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
823    /// When `true`, connections will proceed even if certificate validation fails.
824    pub ignore_https_errors: bool,
825    /// Request timeout to use for the main navigation / resource fetch.
826    /// This is the total time allowed before a request is considered failed.
827    pub request_timeout: Duration,
828    /// Optional browser viewport to use for this target.
829    /// When `None`, the default viewport (or headless browser default) is used.
830    pub viewport: Option<Viewport>,
831    /// Enable request interception for this target.
832    /// When `true`, all network requests will pass through the intercept manager.
833    pub request_intercept: bool,
834    /// Enable caching for this target.
835    /// When `true`, responses may be read from and written to the cache layer.
836    pub cache_enabled: bool,
837    /// If `true`, skip visual/asset resources that are not required for HTML content
838    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
839    pub ignore_visuals: bool,
840    /// If `true`, block JavaScript execution (or avoid loading JS resources)
841    /// for this target. This is useful for purely static HTML crawls.
842    pub ignore_javascript: bool,
843    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
844    /// common tracker domains, etc.).
845    pub ignore_analytics: bool,
846    /// If `true`, block stylesheets and related CSS resources for this target.
847    /// This can reduce bandwidth when only raw HTML is needed.
848    pub ignore_stylesheets: bool,
849    /// If `true`, only HTML documents will be fetched/kept.
850    /// Non-HTML subresources may be skipped entirely.
851    pub only_html: bool,
852    /// Whether service workers are allowed for this target.
853    /// When `true`, service workers may register and intercept requests.
854    pub service_worker_enabled: bool,
855    /// Extra HTTP headers to send with each request for this target.
856    /// Keys should be header names, values their corresponding header values.
857    pub extra_headers: Option<std::collections::HashMap<String, String>>,
858    /// Network intercept manager used to make allow/deny/modify decisions
859    /// for requests when `request_intercept` is enabled.
860    pub intercept_manager: NetworkInterceptManager,
861    /// The maximum number of response bytes allowed for this target.
862    /// When set, responses larger than this limit may be truncated or aborted.
863    pub max_bytes_allowed: Option<u64>,
864    /// Whitelist patterns to allow through the network.
865    pub whitelist_patterns: Option<Vec<String>>,
866}
867
868impl Default for TargetConfig {
869    fn default() -> Self {
870        Self {
871            ignore_https_errors: true,
872            request_timeout: Duration::from_secs(REQUEST_TIMEOUT),
873            viewport: Default::default(),
874            request_intercept: false,
875            cache_enabled: true,
876            service_worker_enabled: true,
877            ignore_javascript: false,
878            ignore_visuals: false,
879            ignore_stylesheets: false,
880            ignore_analytics: true,
881            only_html: false,
882            extra_headers: Default::default(),
883            intercept_manager: NetworkInterceptManager::Unknown,
884            max_bytes_allowed: None,
885            whitelist_patterns: None,
886        }
887    }
888}
889
890#[derive(Debug, Clone, Eq, PartialEq)]
891pub enum TargetType {
892    Page,
893    BackgroundPage,
894    ServiceWorker,
895    SharedWorker,
896    Other,
897    Browser,
898    Webview,
899    Unknown(String),
900}
901
902impl TargetType {
903    pub fn new(ty: &str) -> Self {
904        match ty {
905            "page" => TargetType::Page,
906            "background_page" => TargetType::BackgroundPage,
907            "service_worker" => TargetType::ServiceWorker,
908            "shared_worker" => TargetType::SharedWorker,
909            "other" => TargetType::Other,
910            "browser" => TargetType::Browser,
911            "webview" => TargetType::Webview,
912            s => TargetType::Unknown(s.to_string()),
913        }
914    }
915
916    pub fn is_page(&self) -> bool {
917        matches!(self, TargetType::Page)
918    }
919
920    pub fn is_background_page(&self) -> bool {
921        matches!(self, TargetType::BackgroundPage)
922    }
923
924    pub fn is_service_worker(&self) -> bool {
925        matches!(self, TargetType::ServiceWorker)
926    }
927
928    pub fn is_shared_worker(&self) -> bool {
929        matches!(self, TargetType::SharedWorker)
930    }
931
932    pub fn is_other(&self) -> bool {
933        matches!(self, TargetType::Other)
934    }
935
936    pub fn is_browser(&self) -> bool {
937        matches!(self, TargetType::Browser)
938    }
939
940    pub fn is_webview(&self) -> bool {
941        matches!(self, TargetType::Webview)
942    }
943}
944
945#[derive(Debug)]
946pub(crate) enum TargetEvent {
947    /// An internal request
948    Request(Request),
949    /// An internal navigation request
950    NavigationRequest(NavigationId, Request),
951    /// Indicates that a previous requested navigation has finished
952    NavigationResult(Result<NavigationOk, NavigationError>),
953    /// A new command arrived via a channel
954    Command(CommandMessage),
955    /// The bytes consumed by the network.
956    BytesConsumed(u64),
957}
958
959// TODO this can be moved into the classes?
960#[derive(Debug, PartialEq)]
961pub enum TargetInit {
962    InitializingFrame(CommandChain),
963    InitializingNetwork(CommandChain),
964    InitializingPage(CommandChain),
965    InitializingEmulation(CommandChain),
966    AttachToTarget,
967    Initialized,
968    Closing,
969}
970
971impl TargetInit {
972    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
973        match self {
974            TargetInit::InitializingFrame(cmd) => Some(cmd),
975            TargetInit::InitializingNetwork(cmd) => Some(cmd),
976            TargetInit::InitializingPage(cmd) => Some(cmd),
977            TargetInit::InitializingEmulation(cmd) => Some(cmd),
978            TargetInit::AttachToTarget => None,
979            TargetInit::Initialized => None,
980            TargetInit::Closing => None,
981        }
982    }
983}
984
985#[derive(Debug)]
986pub struct GetExecutionContext {
987    /// For which world the execution context was requested
988    pub dom_world: DOMWorldKind,
989    /// The if of the frame to get the `ExecutionContext` for
990    pub frame_id: Option<FrameId>,
991    /// Sender half of the channel to send the response back
992    pub tx: Sender<Option<ExecutionContextId>>,
993}
994
995impl GetExecutionContext {
996    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
997        Self {
998            dom_world: DOMWorldKind::Main,
999            frame_id: None,
1000            tx,
1001        }
1002    }
1003}
1004
1005#[derive(Debug)]
1006pub struct GetUrl {
1007    /// The id of the frame to get the url for (None = main frame)
1008    pub frame_id: Option<FrameId>,
1009    /// Sender half of the channel to send the response back
1010    pub tx: Sender<Option<String>>,
1011}
1012
1013impl GetUrl {
1014    pub fn new(tx: Sender<Option<String>>) -> Self {
1015        Self { frame_id: None, tx }
1016    }
1017}
1018
1019#[derive(Debug)]
1020pub struct GetName {
1021    /// The id of the frame to get the name for (None = main frame)
1022    pub frame_id: Option<FrameId>,
1023    /// Sender half of the channel to send the response back
1024    pub tx: Sender<Option<String>>,
1025}
1026
1027#[derive(Debug)]
1028pub struct GetParent {
1029    /// The id of the frame to get the parent for (None = main frame)
1030    pub frame_id: FrameId,
1031    /// Sender half of the channel to send the response back
1032    pub tx: Sender<Option<FrameId>>,
1033}
1034
1035#[derive(Debug)]
1036pub enum TargetMessage {
1037    /// Execute a command within the session of this target
1038    Command(CommandMessage),
1039    /// Return the main frame of this target's page
1040    MainFrame(Sender<Option<FrameId>>),
1041    /// Return all the frames of this target's page
1042    AllFrames(Sender<Vec<FrameId>>),
1043    #[cfg(feature = "_cache")]
1044    /// Set the cache key and policy for the target page.
1045    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1046    /// Return the url if available
1047    Url(GetUrl),
1048    /// Return the name if available
1049    Name(GetName),
1050    /// Return the parent id of a frame
1051    Parent(GetParent),
1052    /// A Message that resolves when the frame finished loading a new url
1053    WaitForNavigation(Sender<ArcHttpRequest>),
1054    /// A Message that resolves when the frame network is idle
1055    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1056    /// A Message that resolves when the frame network is almost idle
1057    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1058    /// A request to submit a new listener that gets notified with every
1059    /// received event
1060    AddEventListener(EventListenerRequest),
1061    /// Get the `ExecutionContext` if available
1062    GetExecutionContext(GetExecutionContext),
1063    Authenticate(Credentials),
1064    /// Set block/unblocked networking
1065    BlockNetwork(bool),
1066}