chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::pin::Pin;
3use std::sync::Arc;
4use std::time::Instant;
5
6use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
7use futures::channel::oneshot::Sender;
8use futures::stream::Stream;
9use futures::task::{Context, Poll};
10
11use crate::auth::Credentials;
12use crate::cdp::browser_protocol::target::CloseTargetParams;
13use crate::cmd::CommandChain;
14use crate::cmd::CommandMessage;
15use crate::error::{CdpError, Result};
16use crate::handler::browser::BrowserContext;
17use crate::handler::domworld::DOMWorldKind;
18use crate::handler::emulation::EmulationManager;
19use crate::handler::frame::FrameRequestedNavigation;
20use crate::handler::frame::{
21    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
22};
23use crate::handler::network::{NetworkEvent, NetworkManager};
24use crate::handler::page::PageHandle;
25use crate::handler::viewport::Viewport;
26use crate::handler::{PageInner, REQUEST_TIMEOUT};
27use crate::listeners::{EventListenerRequest, EventListeners};
28use crate::{page::Page, ArcHttpRequest};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    browser::BrowserContextId,
31    log as cdplog,
32    page::{FrameId, GetFrameTreeParams},
33    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
34};
35use chromiumoxide_cdp::cdp::events::CdpEvent;
36use chromiumoxide_cdp::cdp::js_protocol::runtime::{
37    ExecutionContextId, RunIfWaitingForDebuggerParams,
38};
39use chromiumoxide_cdp::cdp::CdpEventMessage;
40use chromiumoxide_types::{Command, Method, Request, Response};
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42use std::time::Duration;
43
44macro_rules! advance_state {
45    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
46        if let Poll::Ready(poll) = $cmds.poll($now) {
47            return match poll {
48                None => {
49                    $s.init_state = $next_state;
50                    $s.poll($cx, $now)
51                }
52                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
53                    method,
54                    session_id: $s.session_id.clone().map(Into::into),
55                    params,
56                })),
57                Some(Err(_)) => Some($s.on_initialization_failed()),
58            };
59        } else {
60            return None;
61        }
62    }};
63}
64
65lazy_static::lazy_static! {
66    /// Initial start command params.
67    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
68        if let Ok(attach) = SetAutoAttachParams::builder()
69            .flatten(true)
70            .auto_attach(true)
71            .wait_for_debugger_on_start(true)
72            .build() {
73                let disable_log = cdplog::DisableParams::default();
74                let mut cmds =  vec![
75                    (
76                        attach.identifier(),
77                        serde_json::to_value(attach).unwrap_or_default(),
78                    ),
79                    (
80                        disable_log.identifier(),
81                        serde_json::to_value(disable_log).unwrap_or_default(),
82                    )
83                ];
84
85                // enable performance on pages.
86                if cfg!(feature = "collect_metrics") {
87                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
88                    cmds.push((
89                        enable_performance.identifier(),
90                        serde_json::to_value(enable_performance).unwrap_or_default(),
91                    ));
92                }
93
94                cmds
95            } else {
96                vec![]
97            }
98    };
99
100    /// Attach to target commands
101    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
102        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
103
104        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
105    };
106}
107
108#[derive(Debug)]
109pub struct Target {
110    /// Info about this target as returned from the chromium instance
111    info: TargetInfo,
112    /// The type of this target
113    r#type: TargetType,
114    /// Configs for this target
115    config: TargetConfig,
116    /// The context this target is running in
117    browser_context: BrowserContext,
118    /// The frame manager that maintains the state of all frames and handles
119    /// navigations of frames
120    frame_manager: FrameManager,
121    /// Handles all the https
122    pub(crate) network_manager: NetworkManager,
123    emulation_manager: EmulationManager,
124    /// The identifier of the session this target is attached to
125    session_id: Option<SessionId>,
126    /// The handle of the browser page of this target
127    page: Option<PageHandle>,
128    /// Drives this target towards initialization
129    pub(crate) init_state: TargetInit,
130    /// Currently queued events to report to the `Handler`
131    queued_events: VecDeque<TargetEvent>,
132    /// All registered event subscriptions
133    event_listeners: EventListeners,
134    /// Senders that need to be notified once the main frame has loaded
135    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
136    /// Senders that need to be notified once the main frame reaches `networkIdle`.
137    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
138    /// (Optional) for `networkAlmostIdle` if you want it as well.
139    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
140    /// The sender who requested the page.
141    initiator: Option<Sender<Result<Page>>>,
142}
143
144impl Target {
145    /// Create a new target instance with `TargetInfo` after a
146    /// `CreateTargetParams` request.
147    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
148        let ty = TargetType::new(&info.r#type);
149        let request_timeout: Duration = config.request_timeout;
150        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
151
152        if !config.cache_enabled {
153            network_manager.set_cache_enabled(false);
154        }
155
156        if !config.service_worker_enabled {
157            network_manager.set_service_worker_enabled(true);
158        }
159
160        network_manager.set_request_interception(config.request_intercept);
161        network_manager.max_bytes_allowed = config.max_bytes_allowed;
162
163        if let Some(ref headers) = config.extra_headers {
164            network_manager.set_extra_headers(headers.clone());
165        }
166
167        network_manager.ignore_visuals = config.ignore_visuals;
168        network_manager.block_javascript = config.ignore_javascript;
169        network_manager.block_analytics = config.ignore_analytics;
170        network_manager.block_stylesheets = config.ignore_stylesheets;
171        network_manager.only_html = config.only_html;
172        network_manager.intercept_manager = config.intercept_manager;
173
174        Self {
175            info,
176            r#type: ty,
177            config,
178            frame_manager: FrameManager::new(request_timeout),
179            network_manager,
180            emulation_manager: EmulationManager::new(request_timeout),
181            session_id: None,
182            page: None,
183            init_state: TargetInit::AttachToTarget,
184            wait_for_frame_navigation: Default::default(),
185            wait_for_network_idle: Default::default(),
186            wait_for_network_almost_idle: Default::default(),
187            queued_events: Default::default(),
188            event_listeners: Default::default(),
189            initiator: None,
190            browser_context,
191        }
192    }
193
194    /// Set the session id.
195    pub fn set_session_id(&mut self, id: SessionId) {
196        self.session_id = Some(id)
197    }
198
199    /// Get the session id.
200    pub fn session_id(&self) -> Option<&SessionId> {
201        self.session_id.as_ref()
202    }
203
204    /// Get the session id mut.
205    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
206        &mut self.session_id
207    }
208
209    /// Get the browser context.
210    pub fn browser_context(&self) -> &BrowserContext {
211        &self.browser_context
212    }
213
214    /// The identifier for this target
215    pub fn target_id(&self) -> &TargetId {
216        &self.info.target_id
217    }
218
219    /// The type of this target
220    pub fn r#type(&self) -> &TargetType {
221        &self.r#type
222    }
223
224    /// Whether this target is already initialized
225    pub fn is_initialized(&self) -> bool {
226        matches!(self.init_state, TargetInit::Initialized)
227    }
228
229    /// Navigate a frame
230    pub fn goto(&mut self, req: FrameRequestedNavigation) {
231        if self.network_manager.has_target_domain() {
232            self.network_manager.clear_target_domain();
233            let goto_url = req
234                .req
235                .params
236                .as_object()
237                .and_then(|o| o.get("url"))
238                .and_then(|v| v.as_str());
239
240            if let Some(url) = goto_url {
241                self.network_manager.set_page_url(url.into());
242            }
243        }
244        self.frame_manager.goto(req);
245    }
246
247    /// Create a new page from the session.
248    fn create_page(&mut self) {
249        if self.page.is_none() {
250            if let Some(session) = self.session_id.clone() {
251                let handle =
252                    PageHandle::new(self.target_id().clone(), session, self.opener_id().cloned());
253                self.page = Some(handle);
254            }
255        }
256    }
257
258    /// Tries to create the `PageInner` if this target is already initialized
259    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
260        self.create_page();
261        self.page.as_ref().map(|p| p.inner())
262    }
263
264    /// Is the target a page?
265    pub fn is_page(&self) -> bool {
266        self.r#type().is_page()
267    }
268
269    /// The browser context ID.
270    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
271        self.info.browser_context_id.as_ref()
272    }
273
274    /// The target connection info.
275    pub fn info(&self) -> &TargetInfo {
276        &self.info
277    }
278
279    /// Get the target that opened this target. Top-level targets return `None`.
280    pub fn opener_id(&self) -> Option<&TargetId> {
281        self.info.opener_id.as_ref()
282    }
283
284    pub fn frame_manager(&self) -> &FrameManager {
285        &self.frame_manager
286    }
287
288    /// The frame manager.
289    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
290        &mut self.frame_manager
291    }
292
293    /// Get event listeners mutably.
294    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
295        &mut self.event_listeners
296    }
297
298    /// Received a response to a command issued by this target
299    pub fn on_response(&mut self, resp: Response, method: &str) {
300        if let Some(cmds) = self.init_state.commands_mut() {
301            cmds.received_response(method);
302        }
303
304        if let GetFrameTreeParams::IDENTIFIER = method {
305            if let Some(resp) = resp
306                .result
307                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
308            {
309                self.frame_manager.on_frame_tree(resp.frame_tree);
310            }
311        }
312        // requests originated from the network manager all return an empty response, hence they
313        // can be ignored here
314    }
315
316    /// On CDP Event message.
317    pub fn on_event(&mut self, event: CdpEventMessage) {
318        let CdpEventMessage {
319            params,
320            method,
321            session_id,
322            ..
323        } = event;
324
325        let is_session_scoped = matches!(
326            params,
327            CdpEvent::FetchRequestPaused(_)
328                | CdpEvent::FetchAuthRequired(_)
329                | CdpEvent::NetworkRequestWillBeSent(_)
330                | CdpEvent::NetworkResponseReceived(_)
331                | CdpEvent::NetworkLoadingFinished(_)
332                | CdpEvent::NetworkLoadingFailed(_)
333                | CdpEvent::PageFrameAttached(_)
334                | CdpEvent::PageFrameDetached(_)
335                | CdpEvent::PageFrameNavigated(_)
336                | CdpEvent::PageNavigatedWithinDocument(_)
337                | CdpEvent::PageLifecycleEvent(_)
338                | CdpEvent::PageFrameStartedLoading(_)
339                | CdpEvent::PageFrameStoppedLoading(_)
340                | CdpEvent::RuntimeExecutionContextCreated(_)
341                | CdpEvent::RuntimeExecutionContextDestroyed(_)
342                | CdpEvent::RuntimeExecutionContextsCleared(_)
343                | CdpEvent::RuntimeBindingCalled(_)
344        );
345
346        if is_session_scoped {
347            let ev_sid: &str = match session_id.as_deref() {
348                Some(s) => s,
349                None => return,
350            };
351
352            let self_sid: &str = match self.session_id.as_ref() {
353                Some(sid) => sid.as_ref(),
354                None => return,
355            };
356
357            if self_sid != ev_sid {
358                return;
359            }
360        }
361
362        match &params {
363            // `FrameManager` events
364            CdpEvent::PageFrameAttached(ev) => self
365                .frame_manager
366                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
367            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
368            CdpEvent::PageFrameNavigated(ev) => {
369                self.frame_manager.on_frame_navigated(&ev.frame);
370            }
371            CdpEvent::PageNavigatedWithinDocument(ev) => {
372                self.frame_manager.on_frame_navigated_within_document(ev)
373            }
374            CdpEvent::RuntimeExecutionContextCreated(ev) => {
375                self.frame_manager.on_frame_execution_context_created(ev)
376            }
377            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
378                self.frame_manager.on_frame_execution_context_destroyed(ev)
379            }
380            CdpEvent::RuntimeExecutionContextsCleared(_) => {
381                self.frame_manager.on_execution_contexts_cleared()
382            }
383            CdpEvent::RuntimeBindingCalled(ev) => {
384                // TODO check if binding registered and payload is json
385                self.frame_manager.on_runtime_binding_called(ev)
386            }
387            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
388            CdpEvent::PageFrameStartedLoading(ev) => {
389                self.frame_manager.on_frame_started_loading(ev);
390            }
391            CdpEvent::PageFrameStoppedLoading(ev) => {
392                self.frame_manager.on_frame_stopped_loading(ev);
393            }
394            // `Target` events
395            CdpEvent::TargetAttachedToTarget(ev) => {
396                if ev.waiting_for_debugger {
397                    let runtime_cmd = ATTACH_TARGET.clone();
398
399                    self.queued_events.push_back(TargetEvent::Request(Request {
400                        method: runtime_cmd.0,
401                        session_id: Some(ev.session_id.clone().into()),
402                        params: runtime_cmd.1,
403                    }));
404                }
405
406                if "service_worker" == &ev.target_info.r#type {
407                    let detach_command = DetachFromTargetParams::builder()
408                        .session_id(ev.session_id.clone())
409                        .build();
410
411                    let method = detach_command.identifier();
412
413                    if let Ok(params) = serde_json::to_value(detach_command) {
414                        self.queued_events.push_back(TargetEvent::Request(Request {
415                            method,
416                            session_id: self.session_id.clone().map(Into::into),
417                            params,
418                        }));
419                    }
420                }
421            }
422            // `NetworkManager` events
423            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
424            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
425            CdpEvent::NetworkRequestWillBeSent(ev) => {
426                self.network_manager.on_request_will_be_sent(ev)
427            }
428            CdpEvent::NetworkRequestServedFromCache(ev) => {
429                self.network_manager.on_request_served_from_cache(ev)
430            }
431            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
432            CdpEvent::NetworkLoadingFinished(ev) => {
433                self.network_manager.on_network_loading_finished(ev)
434            }
435            CdpEvent::NetworkLoadingFailed(ev) => {
436                self.network_manager.on_network_loading_failed(ev)
437            }
438            _ => (),
439        }
440        chromiumoxide_cdp::consume_event!(match params {
441           |ev| self.event_listeners.start_send(ev),
442           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
443        });
444    }
445
446    /// Called when a init command timed out
447    fn on_initialization_failed(&mut self) -> TargetEvent {
448        if let Some(initiator) = self.initiator.take() {
449            let _ = initiator.send(Err(CdpError::Timeout));
450        }
451        self.init_state = TargetInit::Closing;
452        let close_target = CloseTargetParams::new(self.info.target_id.clone());
453
454        TargetEvent::Request(Request {
455            method: close_target.identifier(),
456            session_id: self.session_id.clone().map(Into::into),
457            params: serde_json::to_value(close_target).unwrap_or_default(),
458        })
459    }
460
461    /// Advance that target's state
462    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
463        if !self.is_page() {
464            // can only poll pages
465            return None;
466        }
467
468        match &mut self.init_state {
469            TargetInit::AttachToTarget => {
470                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
471                    self.config.request_timeout,
472                ));
473
474                if let Ok(params) = AttachToTargetParams::builder()
475                    .target_id(self.target_id().clone())
476                    .flatten(true)
477                    .build()
478                {
479                    return Some(TargetEvent::Request(Request::new(
480                        params.identifier(),
481                        serde_json::to_value(params).unwrap_or_default(),
482                    )));
483                } else {
484                    return None;
485                }
486            }
487            TargetInit::InitializingFrame(cmds) => {
488                self.session_id.as_ref()?;
489                if let Poll::Ready(poll) = cmds.poll(now) {
490                    return match poll {
491                        None => {
492                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
493                                let world_name = world_name.clone();
494
495                                if let Some(isolated_world_cmds) =
496                                    self.frame_manager.ensure_isolated_world(&world_name)
497                                {
498                                    *cmds = isolated_world_cmds;
499                                } else {
500                                    self.init_state = TargetInit::InitializingNetwork(
501                                        self.network_manager.init_commands(),
502                                    );
503                                }
504                            } else {
505                                self.init_state = TargetInit::InitializingNetwork(
506                                    self.network_manager.init_commands(),
507                                );
508                            }
509                            self.poll(cx, now)
510                        }
511                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
512                            method,
513                            session_id: self.session_id.clone().map(Into::into),
514                            params,
515                        })),
516                        Some(Err(_)) => Some(self.on_initialization_failed()),
517                    };
518                } else {
519                    return None;
520                }
521            }
522            TargetInit::InitializingNetwork(cmds) => {
523                advance_state!(
524                    self,
525                    cx,
526                    now,
527                    cmds,
528                    TargetInit::InitializingPage(Self::page_init_commands(
529                        self.config.request_timeout
530                    ))
531                );
532            }
533            TargetInit::InitializingPage(cmds) => {
534                advance_state!(
535                    self,
536                    cx,
537                    now,
538                    cmds,
539                    match self.config.viewport.as_ref() {
540                        Some(viewport) => TargetInit::InitializingEmulation(
541                            self.emulation_manager.init_commands(viewport)
542                        ),
543                        None => TargetInit::Initialized,
544                    }
545                );
546            }
547            TargetInit::InitializingEmulation(cmds) => {
548                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
549            }
550            TargetInit::Initialized => {
551                if let Some(initiator) = self.initiator.take() {
552                    // make sure that the main frame of the page has finished loading
553                    if self
554                        .frame_manager
555                        .main_frame()
556                        .map(|frame| frame.is_loaded())
557                        .unwrap_or_default()
558                    {
559                        if let Some(page) = self.get_or_create_page() {
560                            let _ = initiator.send(Ok(page.clone().into()));
561                        } else {
562                            self.initiator = Some(initiator);
563                        }
564                    } else {
565                        self.initiator = Some(initiator);
566                    }
567                }
568            }
569            TargetInit::Closing => return None,
570        };
571
572        loop {
573            if self.init_state == TargetInit::Closing {
574                break None;
575            }
576
577            if let Some(frame) = self.frame_manager.main_frame() {
578                if frame.is_loaded() {
579                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
580                        let _ = tx.send(frame.http_request().cloned());
581                    }
582                }
583
584                if frame.is_network_idle() {
585                    while let Some(tx) = self.wait_for_network_idle.pop() {
586                        let _ = tx.send(frame.http_request().cloned());
587                    }
588                }
589
590                if frame.is_network_almost_idle() {
591                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
592                        let _ = tx.send(frame.http_request().cloned());
593                    }
594                }
595            }
596
597            // Drain queued messages first.
598            if let Some(ev) = self.queued_events.pop_front() {
599                return Some(ev);
600            }
601
602            if let Some(handle) = self.page.as_mut() {
603                while let Poll::Ready(Some(msg)) = Pin::new(&mut handle.rx).poll_next(cx) {
604                    if self.init_state == TargetInit::Closing {
605                        break;
606                    }
607
608                    match msg {
609                        TargetMessage::Command(cmd) => {
610                            if cmd.method == "Network.setBlockedURLs" {
611                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
612                                {
613                                    let mut unblock_all = false;
614                                    let mut block_all = false;
615
616                                    for s in arr.iter().filter_map(|v| v.as_str()) {
617                                        if s == "!*" {
618                                            unblock_all = true;
619                                            break; // "!*" overrides any block rules
620                                        }
621                                        if s.contains('*') {
622                                            block_all = true;
623                                        }
624                                    }
625
626                                    if unblock_all {
627                                        self.network_manager.set_block_all(false);
628                                    } else if block_all {
629                                        self.network_manager.set_block_all(true);
630                                    }
631                                }
632                            }
633                            self.queued_events.push_back(TargetEvent::Command(cmd));
634                        }
635                        TargetMessage::MainFrame(tx) => {
636                            let _ =
637                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
638                        }
639                        TargetMessage::AllFrames(tx) => {
640                            let _ = tx.send(
641                                self.frame_manager
642                                    .frames()
643                                    .map(|f| f.id().clone())
644                                    .collect(),
645                            );
646                        }
647                        #[cfg(feature = "_cache")]
648                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
649                            self.network_manager.set_cache_site_key(cache_key);
650                            self.network_manager.set_cache_policy(cache_policy);
651                        }
652                        TargetMessage::Url(req) => {
653                            let GetUrl { frame_id, tx } = req;
654                            let frame = if let Some(frame_id) = frame_id {
655                                self.frame_manager.frame(&frame_id)
656                            } else {
657                                self.frame_manager.main_frame()
658                            };
659                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
660                        }
661                        TargetMessage::Name(req) => {
662                            let GetName { frame_id, tx } = req;
663                            let frame = if let Some(frame_id) = frame_id {
664                                self.frame_manager.frame(&frame_id)
665                            } else {
666                                self.frame_manager.main_frame()
667                            };
668                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
669                        }
670                        TargetMessage::Parent(req) => {
671                            let GetParent { frame_id, tx } = req;
672                            let frame = self.frame_manager.frame(&frame_id);
673                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
674                        }
675                        TargetMessage::WaitForNavigation(tx) => {
676                            if let Some(frame) = self.frame_manager.main_frame() {
677                                // TODO submit a navigation watcher: waitForFrameNavigation
678
679                                // TODO return the watchers navigationResponse
680                                if frame.is_loaded() {
681                                    let _ = tx.send(frame.http_request().cloned());
682                                } else {
683                                    self.wait_for_frame_navigation.push(tx);
684                                }
685                            } else {
686                                self.wait_for_frame_navigation.push(tx);
687                            }
688                        }
689                        TargetMessage::WaitForNetworkIdle(tx) => {
690                            if let Some(frame) = self.frame_manager.main_frame() {
691                                if frame.is_network_idle() {
692                                    let _ = tx.send(frame.http_request().cloned());
693                                } else {
694                                    self.wait_for_network_idle.push(tx);
695                                }
696                            } else {
697                                self.wait_for_network_idle.push(tx);
698                            }
699                        }
700                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
701                            if let Some(frame) = self.frame_manager.main_frame() {
702                                if frame.is_network_almost_idle() {
703                                    let _ = tx.send(frame.http_request().cloned());
704                                } else {
705                                    self.wait_for_network_almost_idle.push(tx);
706                                }
707                            } else {
708                                self.wait_for_network_almost_idle.push(tx);
709                            }
710                        }
711                        TargetMessage::AddEventListener(req) => {
712                            if req.method == "Fetch.requestPaused" {
713                                self.network_manager.enable_request_intercept();
714                            }
715                            // register a new listener
716                            self.event_listeners.add_listener(req);
717                        }
718                        TargetMessage::GetExecutionContext(ctx) => {
719                            let GetExecutionContext {
720                                dom_world,
721                                frame_id,
722                                tx,
723                            } = ctx;
724                            let frame = if let Some(frame_id) = frame_id {
725                                self.frame_manager.frame(&frame_id)
726                            } else {
727                                self.frame_manager.main_frame()
728                            };
729
730                            if let Some(frame) = frame {
731                                match dom_world {
732                                    DOMWorldKind::Main => {
733                                        let _ = tx.send(frame.main_world().execution_context());
734                                    }
735                                    DOMWorldKind::Secondary => {
736                                        let _ =
737                                            tx.send(frame.secondary_world().execution_context());
738                                    }
739                                }
740                            } else {
741                                let _ = tx.send(None);
742                            }
743                        }
744                        TargetMessage::Authenticate(credentials) => {
745                            self.network_manager.authenticate(credentials);
746                        }
747                    }
748                }
749            }
750
751            while let Some(event) = self.network_manager.poll() {
752                if self.init_state == TargetInit::Closing {
753                    break;
754                }
755                match event {
756                    NetworkEvent::SendCdpRequest((method, params)) => {
757                        // send a message to the browser
758                        self.queued_events.push_back(TargetEvent::Request(Request {
759                            method,
760                            session_id: self.session_id.clone().map(Into::into),
761                            params,
762                        }))
763                    }
764                    NetworkEvent::Request(_) => {}
765                    NetworkEvent::Response(_) => {}
766                    NetworkEvent::RequestFailed(request) => {
767                        self.frame_manager.on_http_request_finished(request);
768                    }
769                    NetworkEvent::RequestFinished(request) => {
770                        self.frame_manager.on_http_request_finished(request);
771                    }
772                    NetworkEvent::BytesConsumed(n) => {
773                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
774                    }
775                }
776            }
777
778            while let Some(event) = self.frame_manager.poll(now) {
779                if self.init_state == TargetInit::Closing {
780                    break;
781                }
782                match event {
783                    FrameEvent::NavigationResult(res) => {
784                        self.queued_events
785                            .push_back(TargetEvent::NavigationResult(res));
786                    }
787                    FrameEvent::NavigationRequest(id, req) => {
788                        self.queued_events
789                            .push_back(TargetEvent::NavigationRequest(id, req));
790                    }
791                }
792            }
793
794            if self.queued_events.is_empty() {
795                return None;
796            }
797        }
798    }
799
800    /// Set the sender half of the channel who requested the creation of this
801    /// target
802    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
803        self.initiator = Some(tx);
804    }
805
806    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
807        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
808    }
809}
810
811/// Configuration for how a single target/page should be fetched and processed.
812#[derive(Debug, Clone)]
813pub struct TargetConfig {
814    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
815    /// When `true`, connections will proceed even if certificate validation fails.
816    pub ignore_https_errors: bool,
817    /// Request timeout to use for the main navigation / resource fetch.
818    /// This is the total time allowed before a request is considered failed.
819    pub request_timeout: Duration,
820    /// Optional browser viewport to use for this target.
821    /// When `None`, the default viewport (or headless browser default) is used.
822    pub viewport: Option<Viewport>,
823    /// Enable request interception for this target.
824    /// When `true`, all network requests will pass through the intercept manager.
825    pub request_intercept: bool,
826    /// Enable caching for this target.
827    /// When `true`, responses may be read from and written to the cache layer.
828    pub cache_enabled: bool,
829    /// If `true`, skip visual/asset resources that are not required for HTML content
830    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
831    pub ignore_visuals: bool,
832    /// If `true`, block JavaScript execution (or avoid loading JS resources)
833    /// for this target. This is useful for purely static HTML crawls.
834    pub ignore_javascript: bool,
835    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
836    /// common tracker domains, etc.).
837    pub ignore_analytics: bool,
838    /// If `true`, block stylesheets and related CSS resources for this target.
839    /// This can reduce bandwidth when only raw HTML is needed.
840    pub ignore_stylesheets: bool,
841    /// If `true`, only HTML documents will be fetched/kept.
842    /// Non-HTML subresources may be skipped entirely.
843    pub only_html: bool,
844    /// Whether service workers are allowed for this target.
845    /// When `true`, service workers may register and intercept requests.
846    pub service_worker_enabled: bool,
847    /// Extra HTTP headers to send with each request for this target.
848    /// Keys should be header names, values their corresponding header values.
849    pub extra_headers: Option<std::collections::HashMap<String, String>>,
850    /// Network intercept manager used to make allow/deny/modify decisions
851    /// for requests when `request_intercept` is enabled.
852    pub intercept_manager: NetworkInterceptManager,
853    /// The maximum number of response bytes allowed for this target.
854    /// When set, responses larger than this limit may be truncated or aborted.
855    pub max_bytes_allowed: Option<u64>,
856}
857
858impl Default for TargetConfig {
859    fn default() -> Self {
860        Self {
861            ignore_https_errors: true,
862            request_timeout: Duration::from_secs(REQUEST_TIMEOUT),
863            viewport: Default::default(),
864            request_intercept: false,
865            cache_enabled: true,
866            service_worker_enabled: true,
867            ignore_javascript: false,
868            ignore_visuals: false,
869            ignore_stylesheets: false,
870            ignore_analytics: true,
871            only_html: false,
872            extra_headers: Default::default(),
873            intercept_manager: NetworkInterceptManager::Unknown,
874            max_bytes_allowed: None,
875        }
876    }
877}
878
879#[derive(Debug, Clone, Eq, PartialEq)]
880pub enum TargetType {
881    Page,
882    BackgroundPage,
883    ServiceWorker,
884    SharedWorker,
885    Other,
886    Browser,
887    Webview,
888    Unknown(String),
889}
890
891impl TargetType {
892    pub fn new(ty: &str) -> Self {
893        match ty {
894            "page" => TargetType::Page,
895            "background_page" => TargetType::BackgroundPage,
896            "service_worker" => TargetType::ServiceWorker,
897            "shared_worker" => TargetType::SharedWorker,
898            "other" => TargetType::Other,
899            "browser" => TargetType::Browser,
900            "webview" => TargetType::Webview,
901            s => TargetType::Unknown(s.to_string()),
902        }
903    }
904
905    pub fn is_page(&self) -> bool {
906        matches!(self, TargetType::Page)
907    }
908
909    pub fn is_background_page(&self) -> bool {
910        matches!(self, TargetType::BackgroundPage)
911    }
912
913    pub fn is_service_worker(&self) -> bool {
914        matches!(self, TargetType::ServiceWorker)
915    }
916
917    pub fn is_shared_worker(&self) -> bool {
918        matches!(self, TargetType::SharedWorker)
919    }
920
921    pub fn is_other(&self) -> bool {
922        matches!(self, TargetType::Other)
923    }
924
925    pub fn is_browser(&self) -> bool {
926        matches!(self, TargetType::Browser)
927    }
928
929    pub fn is_webview(&self) -> bool {
930        matches!(self, TargetType::Webview)
931    }
932}
933
934#[derive(Debug)]
935pub(crate) enum TargetEvent {
936    /// An internal request
937    Request(Request),
938    /// An internal navigation request
939    NavigationRequest(NavigationId, Request),
940    /// Indicates that a previous requested navigation has finished
941    NavigationResult(Result<NavigationOk, NavigationError>),
942    /// A new command arrived via a channel
943    Command(CommandMessage),
944    /// The bytes consumed by the network.
945    BytesConsumed(u64),
946}
947
948// TODO this can be moved into the classes?
949#[derive(Debug, PartialEq)]
950pub enum TargetInit {
951    InitializingFrame(CommandChain),
952    InitializingNetwork(CommandChain),
953    InitializingPage(CommandChain),
954    InitializingEmulation(CommandChain),
955    AttachToTarget,
956    Initialized,
957    Closing,
958}
959
960impl TargetInit {
961    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
962        match self {
963            TargetInit::InitializingFrame(cmd) => Some(cmd),
964            TargetInit::InitializingNetwork(cmd) => Some(cmd),
965            TargetInit::InitializingPage(cmd) => Some(cmd),
966            TargetInit::InitializingEmulation(cmd) => Some(cmd),
967            TargetInit::AttachToTarget => None,
968            TargetInit::Initialized => None,
969            TargetInit::Closing => None,
970        }
971    }
972}
973
974#[derive(Debug)]
975pub struct GetExecutionContext {
976    /// For which world the execution context was requested
977    pub dom_world: DOMWorldKind,
978    /// The if of the frame to get the `ExecutionContext` for
979    pub frame_id: Option<FrameId>,
980    /// Sender half of the channel to send the response back
981    pub tx: Sender<Option<ExecutionContextId>>,
982}
983
984impl GetExecutionContext {
985    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
986        Self {
987            dom_world: DOMWorldKind::Main,
988            frame_id: None,
989            tx,
990        }
991    }
992}
993
994#[derive(Debug)]
995pub struct GetUrl {
996    /// The id of the frame to get the url for (None = main frame)
997    pub frame_id: Option<FrameId>,
998    /// Sender half of the channel to send the response back
999    pub tx: Sender<Option<String>>,
1000}
1001
1002impl GetUrl {
1003    pub fn new(tx: Sender<Option<String>>) -> Self {
1004        Self { frame_id: None, tx }
1005    }
1006}
1007
1008#[derive(Debug)]
1009pub struct GetName {
1010    /// The id of the frame to get the name for (None = main frame)
1011    pub frame_id: Option<FrameId>,
1012    /// Sender half of the channel to send the response back
1013    pub tx: Sender<Option<String>>,
1014}
1015
1016#[derive(Debug)]
1017pub struct GetParent {
1018    /// The id of the frame to get the parent for (None = main frame)
1019    pub frame_id: FrameId,
1020    /// Sender half of the channel to send the response back
1021    pub tx: Sender<Option<FrameId>>,
1022}
1023
1024#[derive(Debug)]
1025pub enum TargetMessage {
1026    /// Execute a command within the session of this target
1027    Command(CommandMessage),
1028    /// Return the main frame of this target's page
1029    MainFrame(Sender<Option<FrameId>>),
1030    /// Return all the frames of this target's page
1031    AllFrames(Sender<Vec<FrameId>>),
1032    #[cfg(feature = "_cache")]
1033    /// Set the cache key and policy for the target page.
1034    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1035    /// Return the url if available
1036    Url(GetUrl),
1037    /// Return the name if available
1038    Name(GetName),
1039    /// Return the parent id of a frame
1040    Parent(GetParent),
1041    /// A Message that resolves when the frame finished loading a new url
1042    WaitForNavigation(Sender<ArcHttpRequest>),
1043    /// A Message that resolves when the frame network is idle
1044    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1045    /// A Message that resolves when the frame network is almost idle
1046    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1047    /// A request to submit a new listener that gets notified with every
1048    /// received event
1049    AddEventListener(EventListenerRequest),
1050    /// Get the `ExecutionContext` if available
1051    GetExecutionContext(GetExecutionContext),
1052    Authenticate(Credentials),
1053}