chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::pin::Pin;
3use std::sync::Arc;
4use std::time::Instant;
5
6use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
7use futures::channel::oneshot::Sender;
8use futures::stream::Stream;
9use futures::task::{Context, Poll};
10
11use crate::auth::Credentials;
12use crate::cdp::browser_protocol::target::CloseTargetParams;
13use crate::cmd::CommandChain;
14use crate::cmd::CommandMessage;
15use crate::error::{CdpError, Result};
16use crate::handler::browser::BrowserContext;
17use crate::handler::domworld::DOMWorldKind;
18use crate::handler::emulation::EmulationManager;
19use crate::handler::frame::FrameRequestedNavigation;
20use crate::handler::frame::{
21    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
22};
23use crate::handler::network::{NetworkEvent, NetworkManager};
24use crate::handler::page::PageHandle;
25use crate::handler::viewport::Viewport;
26use crate::handler::{PageInner, REQUEST_TIMEOUT};
27use crate::listeners::{EventListenerRequest, EventListeners};
28use crate::{page::Page, ArcHttpRequest};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    browser::BrowserContextId,
31    log as cdplog,
32    page::{FrameId, GetFrameTreeParams},
33    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
34};
35use chromiumoxide_cdp::cdp::events::CdpEvent;
36use chromiumoxide_cdp::cdp::js_protocol::runtime::{
37    ExecutionContextId, RunIfWaitingForDebuggerParams,
38};
39use chromiumoxide_cdp::cdp::CdpEventMessage;
40use chromiumoxide_types::{Command, Method, Request, Response};
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42use std::time::Duration;
43
44macro_rules! advance_state {
45    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
46        if let Poll::Ready(poll) = $cmds.poll($now) {
47            return match poll {
48                None => {
49                    $s.init_state = $next_state;
50                    $s.poll($cx, $now)
51                }
52                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
53                    method,
54                    session_id: $s.session_id.clone().map(Into::into),
55                    params,
56                })),
57                Some(Err(_)) => Some($s.on_initialization_failed()),
58            };
59        } else {
60            return None;
61        }
62    }};
63}
64
65lazy_static::lazy_static! {
66    /// Initial start command params.
67    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
68        if let Ok(attach) = SetAutoAttachParams::builder()
69            .flatten(true)
70            .auto_attach(true)
71            .wait_for_debugger_on_start(true)
72            .build() {
73                let disable_log = cdplog::DisableParams::default();
74
75                let mut cmds =  vec![
76                    (
77                        attach.identifier(),
78                        serde_json::to_value(attach).unwrap_or_default(),
79                    ),
80                    (
81                        disable_log.identifier(),
82                        serde_json::to_value(disable_log).unwrap_or_default(),
83                    )
84                ];
85
86                // enable performance on pages.
87                if cfg!(feature = "collect_metrics") {
88                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
89                    cmds.push((
90                        enable_performance.identifier(),
91                        serde_json::to_value(enable_performance).unwrap_or_default(),
92                    ));
93                }
94
95                cmds
96            } else {
97                vec![]
98            }
99    };
100
101    /// Attach to target commands
102    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
103        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
104
105        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
106    };
107}
108
109#[derive(Debug)]
110pub struct Target {
111    /// Info about this target as returned from the chromium instance
112    info: TargetInfo,
113    /// The type of this target
114    r#type: TargetType,
115    /// Configs for this target
116    config: TargetConfig,
117    /// The context this target is running in
118    browser_context: BrowserContext,
119    /// The frame manager that maintains the state of all frames and handles
120    /// navigations of frames
121    frame_manager: FrameManager,
122    /// Handles all the https
123    pub(crate) network_manager: NetworkManager,
124    emulation_manager: EmulationManager,
125    /// The identifier of the session this target is attached to
126    session_id: Option<SessionId>,
127    /// The handle of the browser page of this target
128    page: Option<PageHandle>,
129    /// Drives this target towards initialization
130    pub(crate) init_state: TargetInit,
131    /// Currently queued events to report to the `Handler`
132    queued_events: VecDeque<TargetEvent>,
133    /// All registered event subscriptions
134    event_listeners: EventListeners,
135    /// Senders that need to be notified once the main frame has loaded
136    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
137    /// Senders that need to be notified once the main frame reaches `networkIdle`.
138    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
139    /// (Optional) for `networkAlmostIdle` if you want it as well.
140    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
141    /// The sender who requested the page.
142    initiator: Option<Sender<Result<Page>>>,
143}
144
145impl Target {
146    /// Create a new target instance with `TargetInfo` after a
147    /// `CreateTargetParams` request.
148    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
149        let ty = TargetType::new(&info.r#type);
150        let request_timeout: Duration = config.request_timeout;
151        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
152
153        if !config.cache_enabled {
154            network_manager.set_cache_enabled(false);
155        }
156
157        if !config.service_worker_enabled {
158            network_manager.set_service_worker_enabled(true);
159        }
160
161        network_manager.set_request_interception(config.request_intercept);
162        network_manager.max_bytes_allowed = config.max_bytes_allowed;
163
164        if let Some(ref headers) = config.extra_headers {
165            network_manager.set_extra_headers(headers.clone());
166        }
167
168        network_manager.ignore_visuals = config.ignore_visuals;
169        network_manager.block_javascript = config.ignore_javascript;
170        network_manager.block_analytics = config.ignore_analytics;
171        network_manager.block_stylesheets = config.ignore_stylesheets;
172        network_manager.only_html = config.only_html;
173        network_manager.intercept_manager = config.intercept_manager;
174
175        Self {
176            info,
177            r#type: ty,
178            config,
179            frame_manager: FrameManager::new(request_timeout),
180            network_manager,
181            emulation_manager: EmulationManager::new(request_timeout),
182            session_id: None,
183            page: None,
184            init_state: TargetInit::AttachToTarget,
185            wait_for_frame_navigation: Default::default(),
186            wait_for_network_idle: Default::default(),
187            wait_for_network_almost_idle: Default::default(),
188            queued_events: Default::default(),
189            event_listeners: Default::default(),
190            initiator: None,
191            browser_context,
192        }
193    }
194
195    /// Set the session id.
196    pub fn set_session_id(&mut self, id: SessionId) {
197        self.session_id = Some(id)
198    }
199
200    /// Get the session id.
201    pub fn session_id(&self) -> Option<&SessionId> {
202        self.session_id.as_ref()
203    }
204
205    /// Get the session id mut.
206    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
207        &mut self.session_id
208    }
209
210    /// Get the browser context.
211    pub fn browser_context(&self) -> &BrowserContext {
212        &self.browser_context
213    }
214
215    /// The identifier for this target
216    pub fn target_id(&self) -> &TargetId {
217        &self.info.target_id
218    }
219
220    /// The type of this target
221    pub fn r#type(&self) -> &TargetType {
222        &self.r#type
223    }
224
225    /// Whether this target is already initialized
226    pub fn is_initialized(&self) -> bool {
227        matches!(self.init_state, TargetInit::Initialized)
228    }
229
230    /// Navigate a frame
231    pub fn goto(&mut self, req: FrameRequestedNavigation) {
232        if self.network_manager.has_target_domain() {
233            self.network_manager.clear_target_domain();
234            let goto_url = req
235                .req
236                .params
237                .as_object()
238                .and_then(|o| o.get("url"))
239                .and_then(|v| v.as_str());
240
241            if let Some(url) = goto_url {
242                self.network_manager.set_page_url(url.into());
243            }
244        }
245        self.frame_manager.goto(req);
246    }
247
248    /// Create a new page from the session.
249    fn create_page(&mut self) {
250        if self.page.is_none() {
251            if let Some(session) = self.session_id.clone() {
252                let handle =
253                    PageHandle::new(self.target_id().clone(), session, self.opener_id().cloned());
254                self.page = Some(handle);
255            }
256        }
257    }
258
259    /// Tries to create the `PageInner` if this target is already initialized
260    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
261        self.create_page();
262        self.page.as_ref().map(|p| p.inner())
263    }
264
265    /// Is the target a page?
266    pub fn is_page(&self) -> bool {
267        self.r#type().is_page()
268    }
269
270    /// The browser context ID.
271    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
272        self.info.browser_context_id.as_ref()
273    }
274
275    /// The target connection info.
276    pub fn info(&self) -> &TargetInfo {
277        &self.info
278    }
279
280    /// Get the target that opened this target. Top-level targets return `None`.
281    pub fn opener_id(&self) -> Option<&TargetId> {
282        self.info.opener_id.as_ref()
283    }
284
285    pub fn frame_manager(&self) -> &FrameManager {
286        &self.frame_manager
287    }
288
289    /// The frame manager.
290    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
291        &mut self.frame_manager
292    }
293
294    /// Get event listeners mutably.
295    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
296        &mut self.event_listeners
297    }
298
299    /// Received a response to a command issued by this target
300    pub fn on_response(&mut self, resp: Response, method: &str) {
301        if let Some(cmds) = self.init_state.commands_mut() {
302            cmds.received_response(method);
303        }
304
305        if let GetFrameTreeParams::IDENTIFIER = method {
306            if let Some(resp) = resp
307                .result
308                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
309            {
310                self.frame_manager.on_frame_tree(resp.frame_tree);
311            }
312        }
313        // requests originated from the network manager all return an empty response, hence they
314        // can be ignored here
315    }
316
317    /// On CDP Event message.
318    pub fn on_event(&mut self, event: CdpEventMessage) {
319        let CdpEventMessage {
320            params,
321            method,
322            session_id,
323            ..
324        } = event;
325
326        let is_session_scoped = matches!(
327            params,
328            CdpEvent::FetchRequestPaused(_)
329                | CdpEvent::FetchAuthRequired(_)
330                | CdpEvent::NetworkRequestWillBeSent(_)
331                | CdpEvent::NetworkResponseReceived(_)
332                | CdpEvent::NetworkLoadingFinished(_)
333                | CdpEvent::NetworkLoadingFailed(_)
334                | CdpEvent::PageFrameAttached(_)
335                | CdpEvent::PageFrameDetached(_)
336                | CdpEvent::PageFrameNavigated(_)
337                | CdpEvent::PageNavigatedWithinDocument(_)
338                | CdpEvent::PageLifecycleEvent(_)
339                | CdpEvent::PageFrameStartedLoading(_)
340                | CdpEvent::PageFrameStoppedLoading(_)
341                | CdpEvent::RuntimeExecutionContextCreated(_)
342                | CdpEvent::RuntimeExecutionContextDestroyed(_)
343                | CdpEvent::RuntimeExecutionContextsCleared(_)
344                | CdpEvent::RuntimeBindingCalled(_)
345        );
346
347        if is_session_scoped {
348            let ev_sid: &str = match session_id.as_deref() {
349                Some(s) => s,
350                None => return,
351            };
352
353            let self_sid: &str = match self.session_id.as_ref() {
354                Some(sid) => sid.as_ref(),
355                None => return,
356            };
357
358            if self_sid != ev_sid {
359                return;
360            }
361        }
362
363        match &params {
364            // `FrameManager` events
365            CdpEvent::PageFrameAttached(ev) => self
366                .frame_manager
367                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
368            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
369            CdpEvent::PageFrameNavigated(ev) => {
370                self.frame_manager.on_frame_navigated(&ev.frame);
371            }
372            CdpEvent::PageNavigatedWithinDocument(ev) => {
373                self.frame_manager.on_frame_navigated_within_document(ev)
374            }
375            CdpEvent::RuntimeExecutionContextCreated(ev) => {
376                self.frame_manager.on_frame_execution_context_created(ev)
377            }
378            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
379                self.frame_manager.on_frame_execution_context_destroyed(ev)
380            }
381            CdpEvent::RuntimeExecutionContextsCleared(_) => {
382                self.frame_manager.on_execution_contexts_cleared()
383            }
384            CdpEvent::RuntimeBindingCalled(ev) => {
385                // TODO check if binding registered and payload is json
386                self.frame_manager.on_runtime_binding_called(ev)
387            }
388            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
389            CdpEvent::PageFrameStartedLoading(ev) => {
390                self.frame_manager.on_frame_started_loading(ev);
391            }
392            CdpEvent::PageFrameStoppedLoading(ev) => {
393                self.frame_manager.on_frame_stopped_loading(ev);
394            }
395            // `Target` events
396            CdpEvent::TargetAttachedToTarget(ev) => {
397                if ev.waiting_for_debugger {
398                    let runtime_cmd = ATTACH_TARGET.clone();
399
400                    self.queued_events.push_back(TargetEvent::Request(Request {
401                        method: runtime_cmd.0,
402                        session_id: Some(ev.session_id.clone().into()),
403                        params: runtime_cmd.1,
404                    }));
405                }
406
407                if "service_worker" == &ev.target_info.r#type {
408                    let detach_command = DetachFromTargetParams::builder()
409                        .session_id(ev.session_id.clone())
410                        .build();
411
412                    let method = detach_command.identifier();
413
414                    if let Ok(params) = serde_json::to_value(detach_command) {
415                        self.queued_events.push_back(TargetEvent::Request(Request {
416                            method,
417                            session_id: self.session_id.clone().map(Into::into),
418                            params,
419                        }));
420                    }
421                }
422            }
423            // `NetworkManager` events
424            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
425            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
426            CdpEvent::NetworkRequestWillBeSent(ev) => {
427                self.network_manager.on_request_will_be_sent(ev)
428            }
429            CdpEvent::NetworkRequestServedFromCache(ev) => {
430                self.network_manager.on_request_served_from_cache(ev)
431            }
432            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
433            CdpEvent::NetworkLoadingFinished(ev) => {
434                self.network_manager.on_network_loading_finished(ev)
435            }
436            CdpEvent::NetworkLoadingFailed(ev) => {
437                self.network_manager.on_network_loading_failed(ev)
438            }
439            _ => (),
440        }
441        chromiumoxide_cdp::consume_event!(match params {
442           |ev| self.event_listeners.start_send(ev),
443           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
444        });
445    }
446
447    /// Called when a init command timed out
448    fn on_initialization_failed(&mut self) -> TargetEvent {
449        if let Some(initiator) = self.initiator.take() {
450            let _ = initiator.send(Err(CdpError::Timeout));
451        }
452        self.init_state = TargetInit::Closing;
453        let close_target = CloseTargetParams::new(self.info.target_id.clone());
454
455        TargetEvent::Request(Request {
456            method: close_target.identifier(),
457            session_id: self.session_id.clone().map(Into::into),
458            params: serde_json::to_value(close_target).unwrap_or_default(),
459        })
460    }
461
462    /// Advance that target's state
463    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
464        if !self.is_page() {
465            // can only poll pages
466            return None;
467        }
468
469        match &mut self.init_state {
470            TargetInit::AttachToTarget => {
471                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
472                    self.config.request_timeout,
473                ));
474
475                if let Ok(params) = AttachToTargetParams::builder()
476                    .target_id(self.target_id().clone())
477                    .flatten(true)
478                    .build()
479                {
480                    return Some(TargetEvent::Request(Request::new(
481                        params.identifier(),
482                        serde_json::to_value(params).unwrap_or_default(),
483                    )));
484                } else {
485                    return None;
486                }
487            }
488            TargetInit::InitializingFrame(cmds) => {
489                self.session_id.as_ref()?;
490                if let Poll::Ready(poll) = cmds.poll(now) {
491                    return match poll {
492                        None => {
493                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
494                                let world_name = world_name.clone();
495
496                                if let Some(isolated_world_cmds) =
497                                    self.frame_manager.ensure_isolated_world(&world_name)
498                                {
499                                    *cmds = isolated_world_cmds;
500                                } else {
501                                    self.init_state = TargetInit::InitializingNetwork(
502                                        self.network_manager.init_commands(),
503                                    );
504                                }
505                            } else {
506                                self.init_state = TargetInit::InitializingNetwork(
507                                    self.network_manager.init_commands(),
508                                );
509                            }
510                            self.poll(cx, now)
511                        }
512                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
513                            method,
514                            session_id: self.session_id.clone().map(Into::into),
515                            params,
516                        })),
517                        Some(Err(_)) => Some(self.on_initialization_failed()),
518                    };
519                } else {
520                    return None;
521                }
522            }
523            TargetInit::InitializingNetwork(cmds) => {
524                advance_state!(
525                    self,
526                    cx,
527                    now,
528                    cmds,
529                    TargetInit::InitializingPage(Self::page_init_commands(
530                        self.config.request_timeout
531                    ))
532                );
533            }
534            TargetInit::InitializingPage(cmds) => {
535                advance_state!(
536                    self,
537                    cx,
538                    now,
539                    cmds,
540                    match self.config.viewport.as_ref() {
541                        Some(viewport) => TargetInit::InitializingEmulation(
542                            self.emulation_manager.init_commands(viewport)
543                        ),
544                        None => TargetInit::Initialized,
545                    }
546                );
547            }
548            TargetInit::InitializingEmulation(cmds) => {
549                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
550            }
551            TargetInit::Initialized => {
552                if let Some(initiator) = self.initiator.take() {
553                    // make sure that the main frame of the page has finished loading
554                    if self
555                        .frame_manager
556                        .main_frame()
557                        .map(|frame| frame.is_loaded())
558                        .unwrap_or_default()
559                    {
560                        if let Some(page) = self.get_or_create_page() {
561                            let _ = initiator.send(Ok(page.clone().into()));
562                        } else {
563                            self.initiator = Some(initiator);
564                        }
565                    } else {
566                        self.initiator = Some(initiator);
567                    }
568                }
569            }
570            TargetInit::Closing => return None,
571        };
572
573        loop {
574            if self.init_state == TargetInit::Closing {
575                break None;
576            }
577
578            if let Some(frame) = self.frame_manager.main_frame() {
579                if frame.is_loaded() {
580                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
581                        let _ = tx.send(frame.http_request().cloned());
582                    }
583                }
584
585                if frame.is_network_idle() {
586                    while let Some(tx) = self.wait_for_network_idle.pop() {
587                        let _ = tx.send(frame.http_request().cloned());
588                    }
589                }
590
591                if frame.is_network_almost_idle() {
592                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
593                        let _ = tx.send(frame.http_request().cloned());
594                    }
595                }
596            }
597
598            // Drain queued messages first.
599            if let Some(ev) = self.queued_events.pop_front() {
600                return Some(ev);
601            }
602
603            if let Some(handle) = self.page.as_mut() {
604                while let Poll::Ready(Some(msg)) = Pin::new(&mut handle.rx).poll_next(cx) {
605                    if self.init_state == TargetInit::Closing {
606                        break;
607                    }
608
609                    match msg {
610                        TargetMessage::Command(cmd) => {
611                            if cmd.method == "Network.setBlockedURLs" {
612                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
613                                {
614                                    let mut unblock_all = false;
615                                    let mut block_all = false;
616
617                                    for s in arr.iter().filter_map(|v| v.as_str()) {
618                                        if s == "!*" {
619                                            unblock_all = true;
620                                            break; // "!*" overrides any block rules
621                                        }
622                                        if s.contains('*') {
623                                            block_all = true;
624                                        }
625                                    }
626
627                                    if unblock_all {
628                                        self.network_manager.set_block_all(false);
629                                    } else if block_all {
630                                        self.network_manager.set_block_all(true);
631                                    }
632                                }
633                            }
634                            self.queued_events.push_back(TargetEvent::Command(cmd));
635                        }
636                        TargetMessage::MainFrame(tx) => {
637                            let _ =
638                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
639                        }
640                        TargetMessage::AllFrames(tx) => {
641                            let _ = tx.send(
642                                self.frame_manager
643                                    .frames()
644                                    .map(|f| f.id().clone())
645                                    .collect(),
646                            );
647                        }
648                        #[cfg(feature = "_cache")]
649                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
650                            self.network_manager.set_cache_site_key(cache_key);
651                            self.network_manager.set_cache_policy(cache_policy);
652                        }
653                        TargetMessage::Url(req) => {
654                            let GetUrl { frame_id, tx } = req;
655                            let frame = if let Some(frame_id) = frame_id {
656                                self.frame_manager.frame(&frame_id)
657                            } else {
658                                self.frame_manager.main_frame()
659                            };
660                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
661                        }
662                        TargetMessage::Name(req) => {
663                            let GetName { frame_id, tx } = req;
664                            let frame = if let Some(frame_id) = frame_id {
665                                self.frame_manager.frame(&frame_id)
666                            } else {
667                                self.frame_manager.main_frame()
668                            };
669                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
670                        }
671                        TargetMessage::Parent(req) => {
672                            let GetParent { frame_id, tx } = req;
673                            let frame = self.frame_manager.frame(&frame_id);
674                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
675                        }
676                        TargetMessage::WaitForNavigation(tx) => {
677                            if let Some(frame) = self.frame_manager.main_frame() {
678                                // TODO submit a navigation watcher: waitForFrameNavigation
679
680                                // TODO return the watchers navigationResponse
681                                if frame.is_loaded() {
682                                    let _ = tx.send(frame.http_request().cloned());
683                                } else {
684                                    self.wait_for_frame_navigation.push(tx);
685                                }
686                            } else {
687                                self.wait_for_frame_navigation.push(tx);
688                            }
689                        }
690                        TargetMessage::WaitForNetworkIdle(tx) => {
691                            if let Some(frame) = self.frame_manager.main_frame() {
692                                if frame.is_network_idle() {
693                                    let _ = tx.send(frame.http_request().cloned());
694                                } else {
695                                    self.wait_for_network_idle.push(tx);
696                                }
697                            } else {
698                                self.wait_for_network_idle.push(tx);
699                            }
700                        }
701                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
702                            if let Some(frame) = self.frame_manager.main_frame() {
703                                if frame.is_network_almost_idle() {
704                                    let _ = tx.send(frame.http_request().cloned());
705                                } else {
706                                    self.wait_for_network_almost_idle.push(tx);
707                                }
708                            } else {
709                                self.wait_for_network_almost_idle.push(tx);
710                            }
711                        }
712                        TargetMessage::AddEventListener(req) => {
713                            if req.method == "Fetch.requestPaused" {
714                                self.network_manager.enable_request_intercept();
715                            }
716                            // register a new listener
717                            self.event_listeners.add_listener(req);
718                        }
719                        TargetMessage::GetExecutionContext(ctx) => {
720                            let GetExecutionContext {
721                                dom_world,
722                                frame_id,
723                                tx,
724                            } = ctx;
725                            let frame = if let Some(frame_id) = frame_id {
726                                self.frame_manager.frame(&frame_id)
727                            } else {
728                                self.frame_manager.main_frame()
729                            };
730
731                            if let Some(frame) = frame {
732                                match dom_world {
733                                    DOMWorldKind::Main => {
734                                        let _ = tx.send(frame.main_world().execution_context());
735                                    }
736                                    DOMWorldKind::Secondary => {
737                                        let _ =
738                                            tx.send(frame.secondary_world().execution_context());
739                                    }
740                                }
741                            } else {
742                                let _ = tx.send(None);
743                            }
744                        }
745                        TargetMessage::Authenticate(credentials) => {
746                            self.network_manager.authenticate(credentials);
747                        }
748                    }
749                }
750            }
751
752            while let Some(event) = self.network_manager.poll() {
753                if self.init_state == TargetInit::Closing {
754                    break;
755                }
756                match event {
757                    NetworkEvent::SendCdpRequest((method, params)) => {
758                        // send a message to the browser
759                        self.queued_events.push_back(TargetEvent::Request(Request {
760                            method,
761                            session_id: self.session_id.clone().map(Into::into),
762                            params,
763                        }))
764                    }
765                    NetworkEvent::Request(_) => {}
766                    NetworkEvent::Response(_) => {}
767                    NetworkEvent::RequestFailed(request) => {
768                        self.frame_manager.on_http_request_finished(request);
769                    }
770                    NetworkEvent::RequestFinished(request) => {
771                        self.frame_manager.on_http_request_finished(request);
772                    }
773                    NetworkEvent::BytesConsumed(n) => {
774                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
775                    }
776                }
777            }
778
779            while let Some(event) = self.frame_manager.poll(now) {
780                if self.init_state == TargetInit::Closing {
781                    break;
782                }
783                match event {
784                    FrameEvent::NavigationResult(res) => {
785                        self.queued_events
786                            .push_back(TargetEvent::NavigationResult(res));
787                    }
788                    FrameEvent::NavigationRequest(id, req) => {
789                        self.queued_events
790                            .push_back(TargetEvent::NavigationRequest(id, req));
791                    }
792                }
793            }
794
795            if self.queued_events.is_empty() {
796                return None;
797            }
798        }
799    }
800
801    /// Set the sender half of the channel who requested the creation of this
802    /// target
803    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
804        self.initiator = Some(tx);
805    }
806
807    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
808        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
809    }
810}
811
812/// Configuration for how a single target/page should be fetched and processed.
813#[derive(Debug, Clone)]
814pub struct TargetConfig {
815    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
816    /// When `true`, connections will proceed even if certificate validation fails.
817    pub ignore_https_errors: bool,
818    /// Request timeout to use for the main navigation / resource fetch.
819    /// This is the total time allowed before a request is considered failed.
820    pub request_timeout: Duration,
821    /// Optional browser viewport to use for this target.
822    /// When `None`, the default viewport (or headless browser default) is used.
823    pub viewport: Option<Viewport>,
824    /// Enable request interception for this target.
825    /// When `true`, all network requests will pass through the intercept manager.
826    pub request_intercept: bool,
827    /// Enable caching for this target.
828    /// When `true`, responses may be read from and written to the cache layer.
829    pub cache_enabled: bool,
830    /// If `true`, skip visual/asset resources that are not required for HTML content
831    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
832    pub ignore_visuals: bool,
833    /// If `true`, block JavaScript execution (or avoid loading JS resources)
834    /// for this target. This is useful for purely static HTML crawls.
835    pub ignore_javascript: bool,
836    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
837    /// common tracker domains, etc.).
838    pub ignore_analytics: bool,
839    /// If `true`, block stylesheets and related CSS resources for this target.
840    /// This can reduce bandwidth when only raw HTML is needed.
841    pub ignore_stylesheets: bool,
842    /// If `true`, only HTML documents will be fetched/kept.
843    /// Non-HTML subresources may be skipped entirely.
844    pub only_html: bool,
845    /// Whether service workers are allowed for this target.
846    /// When `true`, service workers may register and intercept requests.
847    pub service_worker_enabled: bool,
848    /// Extra HTTP headers to send with each request for this target.
849    /// Keys should be header names, values their corresponding header values.
850    pub extra_headers: Option<std::collections::HashMap<String, String>>,
851    /// Network intercept manager used to make allow/deny/modify decisions
852    /// for requests when `request_intercept` is enabled.
853    pub intercept_manager: NetworkInterceptManager,
854    /// The maximum number of response bytes allowed for this target.
855    /// When set, responses larger than this limit may be truncated or aborted.
856    pub max_bytes_allowed: Option<u64>,
857}
858
859impl Default for TargetConfig {
860    fn default() -> Self {
861        Self {
862            ignore_https_errors: true,
863            request_timeout: Duration::from_secs(REQUEST_TIMEOUT),
864            viewport: Default::default(),
865            request_intercept: false,
866            cache_enabled: true,
867            service_worker_enabled: true,
868            ignore_javascript: false,
869            ignore_visuals: false,
870            ignore_stylesheets: false,
871            ignore_analytics: true,
872            only_html: false,
873            extra_headers: Default::default(),
874            intercept_manager: NetworkInterceptManager::Unknown,
875            max_bytes_allowed: None,
876        }
877    }
878}
879
880#[derive(Debug, Clone, Eq, PartialEq)]
881pub enum TargetType {
882    Page,
883    BackgroundPage,
884    ServiceWorker,
885    SharedWorker,
886    Other,
887    Browser,
888    Webview,
889    Unknown(String),
890}
891
892impl TargetType {
893    pub fn new(ty: &str) -> Self {
894        match ty {
895            "page" => TargetType::Page,
896            "background_page" => TargetType::BackgroundPage,
897            "service_worker" => TargetType::ServiceWorker,
898            "shared_worker" => TargetType::SharedWorker,
899            "other" => TargetType::Other,
900            "browser" => TargetType::Browser,
901            "webview" => TargetType::Webview,
902            s => TargetType::Unknown(s.to_string()),
903        }
904    }
905
906    pub fn is_page(&self) -> bool {
907        matches!(self, TargetType::Page)
908    }
909
910    pub fn is_background_page(&self) -> bool {
911        matches!(self, TargetType::BackgroundPage)
912    }
913
914    pub fn is_service_worker(&self) -> bool {
915        matches!(self, TargetType::ServiceWorker)
916    }
917
918    pub fn is_shared_worker(&self) -> bool {
919        matches!(self, TargetType::SharedWorker)
920    }
921
922    pub fn is_other(&self) -> bool {
923        matches!(self, TargetType::Other)
924    }
925
926    pub fn is_browser(&self) -> bool {
927        matches!(self, TargetType::Browser)
928    }
929
930    pub fn is_webview(&self) -> bool {
931        matches!(self, TargetType::Webview)
932    }
933}
934
935#[derive(Debug)]
936pub(crate) enum TargetEvent {
937    /// An internal request
938    Request(Request),
939    /// An internal navigation request
940    NavigationRequest(NavigationId, Request),
941    /// Indicates that a previous requested navigation has finished
942    NavigationResult(Result<NavigationOk, NavigationError>),
943    /// A new command arrived via a channel
944    Command(CommandMessage),
945    /// The bytes consumed by the network.
946    BytesConsumed(u64),
947}
948
949// TODO this can be moved into the classes?
950#[derive(Debug, PartialEq)]
951pub enum TargetInit {
952    InitializingFrame(CommandChain),
953    InitializingNetwork(CommandChain),
954    InitializingPage(CommandChain),
955    InitializingEmulation(CommandChain),
956    AttachToTarget,
957    Initialized,
958    Closing,
959}
960
961impl TargetInit {
962    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
963        match self {
964            TargetInit::InitializingFrame(cmd) => Some(cmd),
965            TargetInit::InitializingNetwork(cmd) => Some(cmd),
966            TargetInit::InitializingPage(cmd) => Some(cmd),
967            TargetInit::InitializingEmulation(cmd) => Some(cmd),
968            TargetInit::AttachToTarget => None,
969            TargetInit::Initialized => None,
970            TargetInit::Closing => None,
971        }
972    }
973}
974
975#[derive(Debug)]
976pub struct GetExecutionContext {
977    /// For which world the execution context was requested
978    pub dom_world: DOMWorldKind,
979    /// The if of the frame to get the `ExecutionContext` for
980    pub frame_id: Option<FrameId>,
981    /// Sender half of the channel to send the response back
982    pub tx: Sender<Option<ExecutionContextId>>,
983}
984
985impl GetExecutionContext {
986    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
987        Self {
988            dom_world: DOMWorldKind::Main,
989            frame_id: None,
990            tx,
991        }
992    }
993}
994
995#[derive(Debug)]
996pub struct GetUrl {
997    /// The id of the frame to get the url for (None = main frame)
998    pub frame_id: Option<FrameId>,
999    /// Sender half of the channel to send the response back
1000    pub tx: Sender<Option<String>>,
1001}
1002
1003impl GetUrl {
1004    pub fn new(tx: Sender<Option<String>>) -> Self {
1005        Self { frame_id: None, tx }
1006    }
1007}
1008
1009#[derive(Debug)]
1010pub struct GetName {
1011    /// The id of the frame to get the name for (None = main frame)
1012    pub frame_id: Option<FrameId>,
1013    /// Sender half of the channel to send the response back
1014    pub tx: Sender<Option<String>>,
1015}
1016
1017#[derive(Debug)]
1018pub struct GetParent {
1019    /// The id of the frame to get the parent for (None = main frame)
1020    pub frame_id: FrameId,
1021    /// Sender half of the channel to send the response back
1022    pub tx: Sender<Option<FrameId>>,
1023}
1024
1025#[derive(Debug)]
1026pub enum TargetMessage {
1027    /// Execute a command within the session of this target
1028    Command(CommandMessage),
1029    /// Return the main frame of this target's page
1030    MainFrame(Sender<Option<FrameId>>),
1031    /// Return all the frames of this target's page
1032    AllFrames(Sender<Vec<FrameId>>),
1033    #[cfg(feature = "_cache")]
1034    /// Set the cache key and policy for the target page.
1035    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1036    /// Return the url if available
1037    Url(GetUrl),
1038    /// Return the name if available
1039    Name(GetName),
1040    /// Return the parent id of a frame
1041    Parent(GetParent),
1042    /// A Message that resolves when the frame finished loading a new url
1043    WaitForNavigation(Sender<ArcHttpRequest>),
1044    /// A Message that resolves when the frame network is idle
1045    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1046    /// A Message that resolves when the frame network is almost idle
1047    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1048    /// A request to submit a new listener that gets notified with every
1049    /// received event
1050    AddEventListener(EventListenerRequest),
1051    /// Get the `ExecutionContext` if available
1052    GetExecutionContext(GetExecutionContext),
1053    Authenticate(Credentials),
1054}