Skip to main content

chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::pin::Pin;
3use std::sync::Arc;
4use std::time::Instant;
5
6use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
7use futures::channel::oneshot::Sender;
8use futures::stream::Stream;
9use futures::task::{Context, Poll};
10
11use crate::auth::Credentials;
12use crate::cdp::browser_protocol::target::CloseTargetParams;
13use crate::cmd::CommandChain;
14use crate::cmd::CommandMessage;
15use crate::error::{CdpError, Result};
16use crate::handler::browser::BrowserContext;
17use crate::handler::domworld::DOMWorldKind;
18use crate::handler::emulation::EmulationManager;
19use crate::handler::frame::FrameRequestedNavigation;
20use crate::handler::frame::{
21    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
22};
23use crate::handler::network::{NetworkEvent, NetworkManager};
24use crate::handler::page::PageHandle;
25use crate::handler::viewport::Viewport;
26use crate::handler::{PageInner, REQUEST_TIMEOUT};
27use crate::listeners::{EventListenerRequest, EventListeners};
28use crate::{page::Page, ArcHttpRequest};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    browser::BrowserContextId,
31    log as cdplog,
32    page::{FrameId, GetFrameTreeParams},
33    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
34};
35use chromiumoxide_cdp::cdp::events::CdpEvent;
36use chromiumoxide_cdp::cdp::js_protocol::runtime::{
37    ExecutionContextId, RunIfWaitingForDebuggerParams,
38};
39use chromiumoxide_cdp::cdp::CdpEventMessage;
40use chromiumoxide_types::{Command, Method, Request, Response};
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42use std::time::Duration;
43
44macro_rules! advance_state {
45    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
46        if let Poll::Ready(poll) = $cmds.poll($now) {
47            return match poll {
48                None => {
49                    $s.init_state = $next_state;
50                    $s.poll($cx, $now)
51                }
52                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
53                    method,
54                    session_id: $s.session_id.clone().map(Into::into),
55                    params,
56                })),
57                Some(Err(_)) => Some($s.on_initialization_failed()),
58            };
59        } else {
60            return None;
61        }
62    }};
63}
64
65lazy_static::lazy_static! {
66    /// Initial start command params.
67    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
68        if let Ok(attach) = SetAutoAttachParams::builder()
69            .flatten(true)
70            .auto_attach(true)
71            .wait_for_debugger_on_start(true)
72            .build() {
73                let disable_log = cdplog::DisableParams::default();
74
75                let mut cmds =  vec![
76                    (
77                        attach.identifier(),
78                        serde_json::to_value(attach).unwrap_or_default(),
79                    ),
80                    (
81                        disable_log.identifier(),
82                        serde_json::to_value(disable_log).unwrap_or_default(),
83                    )
84                ];
85
86                // enable performance on pages.
87                if cfg!(feature = "collect_metrics") {
88                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
89                    cmds.push((
90                        enable_performance.identifier(),
91                        serde_json::to_value(enable_performance).unwrap_or_default(),
92                    ));
93                }
94
95                cmds
96            } else {
97                vec![]
98            }
99    };
100
101    /// Attach to target commands
102    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
103        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
104
105        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
106    };
107}
108
109#[derive(Debug)]
110pub struct Target {
111    /// Info about this target as returned from the chromium instance
112    info: TargetInfo,
113    /// The type of this target
114    r#type: TargetType,
115    /// Configs for this target
116    config: TargetConfig,
117    /// The context this target is running in
118    browser_context: BrowserContext,
119    /// The frame manager that maintains the state of all frames and handles
120    /// navigations of frames
121    frame_manager: FrameManager,
122    /// Handles all the https
123    pub(crate) network_manager: NetworkManager,
124    emulation_manager: EmulationManager,
125    /// The identifier of the session this target is attached to
126    session_id: Option<SessionId>,
127    /// The handle of the browser page of this target
128    page: Option<PageHandle>,
129    /// Drives this target towards initialization
130    pub(crate) init_state: TargetInit,
131    /// Currently queued events to report to the `Handler`
132    queued_events: VecDeque<TargetEvent>,
133    /// All registered event subscriptions
134    event_listeners: EventListeners,
135    /// Senders that need to be notified once the main frame has loaded
136    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
137    /// Senders that need to be notified once the main frame reaches `networkIdle`.
138    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
139    /// (Optional) for `networkAlmostIdle` if you want it as well.
140    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
141    /// The sender who requested the page.
142    initiator: Option<Sender<Result<Page>>>,
143}
144
145impl Target {
146    /// Create a new target instance with `TargetInfo` after a
147    /// `CreateTargetParams` request.
148    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
149        let ty = TargetType::new(&info.r#type);
150        let request_timeout: Duration = config.request_timeout;
151        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
152
153        if !config.cache_enabled {
154            network_manager.set_cache_enabled(false);
155        }
156
157        if !config.service_worker_enabled {
158            network_manager.set_service_worker_enabled(true);
159        }
160
161        network_manager.set_request_interception(config.request_intercept);
162        network_manager.max_bytes_allowed = config.max_bytes_allowed;
163
164        if let Some(headers) = &config.extra_headers {
165            network_manager.set_extra_headers(headers.clone());
166        }
167
168        if let Some(whitelist) = &config.whitelist_patterns {
169            network_manager.set_whitelist_patterns(whitelist.clone());
170        }
171
172        if let Some(blacklist) = &config.blacklist_patterns {
173            network_manager.set_blacklist_patterns(blacklist);
174        }
175
176        network_manager.ignore_visuals = config.ignore_visuals;
177        network_manager.block_javascript = config.ignore_javascript;
178        network_manager.block_analytics = config.ignore_analytics;
179        network_manager.block_prefetch = config.ignore_prefetch;
180
181        network_manager.block_stylesheets = config.ignore_stylesheets;
182        network_manager.only_html = config.only_html;
183        network_manager.intercept_manager = config.intercept_manager;
184
185        Self {
186            info,
187            r#type: ty,
188            config,
189            frame_manager: FrameManager::new(request_timeout),
190            network_manager,
191            emulation_manager: EmulationManager::new(request_timeout),
192            session_id: None,
193            page: None,
194            init_state: TargetInit::AttachToTarget,
195            wait_for_frame_navigation: Default::default(),
196            wait_for_network_idle: Default::default(),
197            wait_for_network_almost_idle: Default::default(),
198            queued_events: Default::default(),
199            event_listeners: Default::default(),
200            initiator: None,
201            browser_context,
202        }
203    }
204
205    /// Set the session id.
206    pub fn set_session_id(&mut self, id: SessionId) {
207        self.session_id = Some(id)
208    }
209
210    /// Get the session id.
211    pub fn session_id(&self) -> Option<&SessionId> {
212        self.session_id.as_ref()
213    }
214
215    /// Get the session id mut.
216    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
217        &mut self.session_id
218    }
219
220    /// Get the browser context.
221    pub fn browser_context(&self) -> &BrowserContext {
222        &self.browser_context
223    }
224
225    /// The identifier for this target
226    pub fn target_id(&self) -> &TargetId {
227        &self.info.target_id
228    }
229
230    /// The type of this target
231    pub fn r#type(&self) -> &TargetType {
232        &self.r#type
233    }
234
235    /// Whether this target is already initialized
236    pub fn is_initialized(&self) -> bool {
237        matches!(self.init_state, TargetInit::Initialized)
238    }
239
240    /// Navigate a frame
241    pub fn goto(&mut self, req: FrameRequestedNavigation) {
242        if self.network_manager.has_target_domain() {
243            self.network_manager.clear_target_domain();
244            let goto_url = req
245                .req
246                .params
247                .as_object()
248                .and_then(|o| o.get("url"))
249                .and_then(|v| v.as_str());
250
251            if let Some(url) = goto_url {
252                self.network_manager.set_page_url(url.into());
253            }
254        }
255        self.frame_manager.goto(req);
256    }
257
258    /// Create a new page from the session.
259    fn create_page(&mut self) {
260        if self.page.is_none() {
261            if let Some(session) = self.session_id.clone() {
262                let handle =
263                    PageHandle::new(self.target_id().clone(), session, self.opener_id().cloned());
264                self.page = Some(handle);
265            }
266        }
267    }
268
269    /// Tries to create the `PageInner` if this target is already initialized
270    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
271        self.create_page();
272        self.page.as_ref().map(|p| p.inner())
273    }
274
275    /// Is the target a page?
276    pub fn is_page(&self) -> bool {
277        self.r#type().is_page()
278    }
279
280    /// The browser context ID.
281    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
282        self.info.browser_context_id.as_ref()
283    }
284
285    /// The target connection info.
286    pub fn info(&self) -> &TargetInfo {
287        &self.info
288    }
289
290    /// Get the target that opened this target. Top-level targets return `None`.
291    pub fn opener_id(&self) -> Option<&TargetId> {
292        self.info.opener_id.as_ref()
293    }
294
295    pub fn frame_manager(&self) -> &FrameManager {
296        &self.frame_manager
297    }
298
299    /// The frame manager.
300    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
301        &mut self.frame_manager
302    }
303
304    /// Get event listeners mutably.
305    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
306        &mut self.event_listeners
307    }
308
309    /// Received a response to a command issued by this target
310    pub fn on_response(&mut self, resp: Response, method: &str) {
311        if let Some(cmds) = self.init_state.commands_mut() {
312            cmds.received_response(method);
313        }
314
315        if let GetFrameTreeParams::IDENTIFIER = method {
316            if let Some(resp) = resp
317                .result
318                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
319            {
320                self.frame_manager.on_frame_tree(resp.frame_tree);
321            }
322        }
323        // requests originated from the network manager all return an empty response, hence they
324        // can be ignored here
325    }
326
327    /// On CDP Event message.
328    pub fn on_event(&mut self, event: CdpEventMessage) {
329        let CdpEventMessage {
330            params,
331            method,
332            session_id,
333            ..
334        } = event;
335
336        let is_session_scoped = matches!(
337            params,
338            CdpEvent::FetchRequestPaused(_)
339                | CdpEvent::FetchAuthRequired(_)
340                | CdpEvent::NetworkRequestWillBeSent(_)
341                | CdpEvent::NetworkResponseReceived(_)
342                | CdpEvent::NetworkLoadingFinished(_)
343                | CdpEvent::NetworkLoadingFailed(_)
344                | CdpEvent::PageFrameAttached(_)
345                | CdpEvent::PageFrameDetached(_)
346                | CdpEvent::PageFrameNavigated(_)
347                | CdpEvent::PageNavigatedWithinDocument(_)
348                | CdpEvent::PageLifecycleEvent(_)
349                | CdpEvent::PageFrameStartedLoading(_)
350                | CdpEvent::PageFrameStoppedLoading(_)
351                | CdpEvent::RuntimeExecutionContextCreated(_)
352                | CdpEvent::RuntimeExecutionContextDestroyed(_)
353                | CdpEvent::RuntimeExecutionContextsCleared(_)
354                | CdpEvent::RuntimeBindingCalled(_)
355        );
356
357        if is_session_scoped {
358            let ev_sid: &str = match session_id.as_deref() {
359                Some(s) => s,
360                None => return,
361            };
362
363            let self_sid: &str = match self.session_id.as_ref() {
364                Some(sid) => sid.as_ref(),
365                None => return,
366            };
367
368            if self_sid != ev_sid {
369                return;
370            }
371        }
372
373        match &params {
374            // `FrameManager` events
375            CdpEvent::PageFrameAttached(ev) => self
376                .frame_manager
377                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
378            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
379            CdpEvent::PageFrameNavigated(ev) => {
380                self.frame_manager.on_frame_navigated(&ev.frame);
381            }
382            CdpEvent::PageNavigatedWithinDocument(ev) => {
383                self.frame_manager.on_frame_navigated_within_document(ev)
384            }
385            CdpEvent::RuntimeExecutionContextCreated(ev) => {
386                self.frame_manager.on_frame_execution_context_created(ev)
387            }
388            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
389                self.frame_manager.on_frame_execution_context_destroyed(ev)
390            }
391            CdpEvent::RuntimeExecutionContextsCleared(_) => {
392                self.frame_manager.on_execution_contexts_cleared()
393            }
394            CdpEvent::RuntimeBindingCalled(ev) => {
395                // TODO check if binding registered and payload is json
396                self.frame_manager.on_runtime_binding_called(ev)
397            }
398            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
399            CdpEvent::PageFrameStartedLoading(ev) => {
400                self.frame_manager.on_frame_started_loading(ev);
401            }
402            CdpEvent::PageFrameStoppedLoading(ev) => {
403                self.frame_manager.on_frame_stopped_loading(ev);
404            }
405            // `Target` events
406            CdpEvent::TargetAttachedToTarget(ev) => {
407                if ev.waiting_for_debugger {
408                    let runtime_cmd = ATTACH_TARGET.clone();
409
410                    self.queued_events.push_back(TargetEvent::Request(Request {
411                        method: runtime_cmd.0,
412                        session_id: Some(ev.session_id.clone().into()),
413                        params: runtime_cmd.1,
414                    }));
415                }
416
417                if "service_worker" == &ev.target_info.r#type {
418                    let detach_command = DetachFromTargetParams::builder()
419                        .session_id(ev.session_id.clone())
420                        .build();
421
422                    let method = detach_command.identifier();
423
424                    if let Ok(params) = serde_json::to_value(detach_command) {
425                        self.queued_events.push_back(TargetEvent::Request(Request {
426                            method,
427                            session_id: self.session_id.clone().map(Into::into),
428                            params,
429                        }));
430                    }
431                }
432            }
433            // `NetworkManager` events
434            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
435            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
436            CdpEvent::NetworkRequestWillBeSent(ev) => {
437                self.network_manager.on_request_will_be_sent(ev)
438            }
439            CdpEvent::NetworkRequestServedFromCache(ev) => {
440                self.network_manager.on_request_served_from_cache(ev)
441            }
442            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
443            CdpEvent::NetworkLoadingFinished(ev) => {
444                self.network_manager.on_network_loading_finished(ev)
445            }
446            CdpEvent::NetworkLoadingFailed(ev) => {
447                self.network_manager.on_network_loading_failed(ev)
448            }
449            _ => (),
450        }
451        chromiumoxide_cdp::consume_event!(match params {
452           |ev| self.event_listeners.start_send(ev),
453           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
454        });
455    }
456
457    /// Called when a init command timed out
458    fn on_initialization_failed(&mut self) -> TargetEvent {
459        if let Some(initiator) = self.initiator.take() {
460            let _ = initiator.send(Err(CdpError::Timeout));
461        }
462        self.init_state = TargetInit::Closing;
463        let close_target = CloseTargetParams::new(self.info.target_id.clone());
464
465        TargetEvent::Request(Request {
466            method: close_target.identifier(),
467            session_id: self.session_id.clone().map(Into::into),
468            params: serde_json::to_value(close_target).unwrap_or_default(),
469        })
470    }
471
472    /// Advance that target's state
473    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
474        if !self.is_page() {
475            // can only poll pages
476            return None;
477        }
478
479        match &mut self.init_state {
480            TargetInit::AttachToTarget => {
481                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
482                    self.config.request_timeout,
483                ));
484
485                if let Ok(params) = AttachToTargetParams::builder()
486                    .target_id(self.target_id().clone())
487                    .flatten(true)
488                    .build()
489                {
490                    return Some(TargetEvent::Request(Request::new(
491                        params.identifier(),
492                        serde_json::to_value(params).unwrap_or_default(),
493                    )));
494                } else {
495                    return None;
496                }
497            }
498            TargetInit::InitializingFrame(cmds) => {
499                self.session_id.as_ref()?;
500                if let Poll::Ready(poll) = cmds.poll(now) {
501                    return match poll {
502                        None => {
503                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
504                                let world_name = world_name.clone();
505
506                                if let Some(isolated_world_cmds) =
507                                    self.frame_manager.ensure_isolated_world(&world_name)
508                                {
509                                    *cmds = isolated_world_cmds;
510                                } else {
511                                    self.init_state = TargetInit::InitializingNetwork(
512                                        self.network_manager.init_commands(),
513                                    );
514                                }
515                            } else {
516                                self.init_state = TargetInit::InitializingNetwork(
517                                    self.network_manager.init_commands(),
518                                );
519                            }
520                            self.poll(cx, now)
521                        }
522                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
523                            method,
524                            session_id: self.session_id.clone().map(Into::into),
525                            params,
526                        })),
527                        Some(Err(_)) => Some(self.on_initialization_failed()),
528                    };
529                } else {
530                    return None;
531                }
532            }
533            TargetInit::InitializingNetwork(cmds) => {
534                advance_state!(
535                    self,
536                    cx,
537                    now,
538                    cmds,
539                    TargetInit::InitializingPage(Self::page_init_commands(
540                        self.config.request_timeout
541                    ))
542                );
543            }
544            TargetInit::InitializingPage(cmds) => {
545                advance_state!(
546                    self,
547                    cx,
548                    now,
549                    cmds,
550                    match self.config.viewport.as_ref() {
551                        Some(viewport) => TargetInit::InitializingEmulation(
552                            self.emulation_manager.init_commands(viewport)
553                        ),
554                        None => TargetInit::Initialized,
555                    }
556                );
557            }
558            TargetInit::InitializingEmulation(cmds) => {
559                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
560            }
561            TargetInit::Initialized => {
562                if let Some(initiator) = self.initiator.take() {
563                    // make sure that the main frame of the page has finished loading
564                    if self
565                        .frame_manager
566                        .main_frame()
567                        .map(|frame| frame.is_loaded())
568                        .unwrap_or_default()
569                    {
570                        if let Some(page) = self.get_or_create_page() {
571                            let _ = initiator.send(Ok(page.clone().into()));
572                        } else {
573                            self.initiator = Some(initiator);
574                        }
575                    } else {
576                        self.initiator = Some(initiator);
577                    }
578                }
579            }
580            TargetInit::Closing => return None,
581        };
582
583        loop {
584            if self.init_state == TargetInit::Closing {
585                break None;
586            }
587
588            if let Some(frame) = self.frame_manager.main_frame() {
589                if frame.is_loaded() {
590                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
591                        let _ = tx.send(frame.http_request().cloned());
592                    }
593                }
594
595                if frame.is_network_idle() {
596                    while let Some(tx) = self.wait_for_network_idle.pop() {
597                        let _ = tx.send(frame.http_request().cloned());
598                    }
599                }
600
601                if frame.is_network_almost_idle() {
602                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
603                        let _ = tx.send(frame.http_request().cloned());
604                    }
605                }
606            }
607
608            // Drain queued messages first.
609            if let Some(ev) = self.queued_events.pop_front() {
610                return Some(ev);
611            }
612
613            if let Some(handle) = self.page.as_mut() {
614                while let Poll::Ready(Some(msg)) = Pin::new(&mut handle.rx).poll_next(cx) {
615                    if self.init_state == TargetInit::Closing {
616                        break;
617                    }
618
619                    match msg {
620                        TargetMessage::Command(cmd) => {
621                            if cmd.method == "Network.setBlockedURLs" {
622                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
623                                {
624                                    let mut unblock_all = false;
625                                    let mut block_all = false;
626
627                                    for s in arr.iter().filter_map(|v| v.as_str()) {
628                                        if s == "!*" {
629                                            unblock_all = true;
630                                            break; // "!*" overrides any block rules
631                                        }
632                                        if s.contains('*') {
633                                            block_all = true;
634                                        }
635                                    }
636
637                                    if unblock_all {
638                                        self.network_manager.set_block_all(false);
639                                    } else if block_all {
640                                        self.network_manager.set_block_all(true);
641                                    }
642                                }
643                            }
644                            self.queued_events.push_back(TargetEvent::Command(cmd));
645                        }
646                        TargetMessage::MainFrame(tx) => {
647                            let _ =
648                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
649                        }
650                        TargetMessage::AllFrames(tx) => {
651                            let _ = tx.send(
652                                self.frame_manager
653                                    .frames()
654                                    .map(|f| f.id().clone())
655                                    .collect(),
656                            );
657                        }
658                        #[cfg(feature = "_cache")]
659                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
660                            self.network_manager.set_cache_site_key(cache_key);
661                            self.network_manager.set_cache_policy(cache_policy);
662                        }
663                        TargetMessage::Url(req) => {
664                            let GetUrl { frame_id, tx } = req;
665                            let frame = if let Some(frame_id) = frame_id {
666                                self.frame_manager.frame(&frame_id)
667                            } else {
668                                self.frame_manager.main_frame()
669                            };
670                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
671                        }
672                        TargetMessage::Name(req) => {
673                            let GetName { frame_id, tx } = req;
674                            let frame = if let Some(frame_id) = frame_id {
675                                self.frame_manager.frame(&frame_id)
676                            } else {
677                                self.frame_manager.main_frame()
678                            };
679                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
680                        }
681                        TargetMessage::Parent(req) => {
682                            let GetParent { frame_id, tx } = req;
683                            let frame = self.frame_manager.frame(&frame_id);
684                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
685                        }
686                        TargetMessage::WaitForNavigation(tx) => {
687                            if let Some(frame) = self.frame_manager.main_frame() {
688                                if frame.is_loaded() {
689                                    let _ = tx.send(frame.http_request().cloned());
690                                } else {
691                                    self.wait_for_frame_navigation.push(tx);
692                                }
693                            } else {
694                                self.wait_for_frame_navigation.push(tx);
695                            }
696                        }
697                        TargetMessage::WaitForNetworkIdle(tx) => {
698                            if let Some(frame) = self.frame_manager.main_frame() {
699                                if frame.is_network_idle() {
700                                    let _ = tx.send(frame.http_request().cloned());
701                                } else {
702                                    self.wait_for_network_idle.push(tx);
703                                }
704                            } else {
705                                self.wait_for_network_idle.push(tx);
706                            }
707                        }
708                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
709                            if let Some(frame) = self.frame_manager.main_frame() {
710                                if frame.is_network_almost_idle() {
711                                    let _ = tx.send(frame.http_request().cloned());
712                                } else {
713                                    self.wait_for_network_almost_idle.push(tx);
714                                }
715                            } else {
716                                self.wait_for_network_almost_idle.push(tx);
717                            }
718                        }
719                        TargetMessage::AddEventListener(req) => {
720                            if req.method == "Fetch.requestPaused" {
721                                self.network_manager.enable_request_intercept();
722                            }
723                            // register a new listener
724                            self.event_listeners.add_listener(req);
725                        }
726                        TargetMessage::GetExecutionContext(ctx) => {
727                            let GetExecutionContext {
728                                dom_world,
729                                frame_id,
730                                tx,
731                            } = ctx;
732                            let frame = if let Some(frame_id) = frame_id {
733                                self.frame_manager.frame(&frame_id)
734                            } else {
735                                self.frame_manager.main_frame()
736                            };
737
738                            if let Some(frame) = frame {
739                                match dom_world {
740                                    DOMWorldKind::Main => {
741                                        let _ = tx.send(frame.main_world().execution_context());
742                                    }
743                                    DOMWorldKind::Secondary => {
744                                        let _ =
745                                            tx.send(frame.secondary_world().execution_context());
746                                    }
747                                }
748                            } else {
749                                let _ = tx.send(None);
750                            }
751                        }
752                        TargetMessage::Authenticate(credentials) => {
753                            self.network_manager.authenticate(credentials);
754                        }
755                        TargetMessage::BlockNetwork(blocked) => {
756                            self.network_manager.set_block_all(blocked);
757                        }
758                        TargetMessage::EnableInterception(enabled) => {
759                            // if interception is enabled disable the user facing handling.
760                            self.network_manager.user_request_interception_enabled = !enabled;
761                        }
762                    }
763                }
764            }
765
766            while let Some(event) = self.network_manager.poll() {
767                if self.init_state == TargetInit::Closing {
768                    break;
769                }
770                match event {
771                    NetworkEvent::SendCdpRequest((method, params)) => {
772                        // send a message to the browser
773                        self.queued_events.push_back(TargetEvent::Request(Request {
774                            method,
775                            session_id: self.session_id.clone().map(Into::into),
776                            params,
777                        }))
778                    }
779                    NetworkEvent::Request(_) => {}
780                    NetworkEvent::Response(_) => {}
781                    NetworkEvent::RequestFailed(request) => {
782                        self.frame_manager.on_http_request_finished(request);
783                    }
784                    NetworkEvent::RequestFinished(request) => {
785                        self.frame_manager.on_http_request_finished(request);
786                    }
787                    NetworkEvent::BytesConsumed(n) => {
788                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
789                    }
790                }
791            }
792
793            while let Some(event) = self.frame_manager.poll(now) {
794                if self.init_state == TargetInit::Closing {
795                    break;
796                }
797                match event {
798                    FrameEvent::NavigationResult(res) => {
799                        self.queued_events
800                            .push_back(TargetEvent::NavigationResult(res));
801                    }
802                    FrameEvent::NavigationRequest(id, req) => {
803                        self.queued_events
804                            .push_back(TargetEvent::NavigationRequest(id, req));
805                    }
806                }
807            }
808
809            if self.queued_events.is_empty() {
810                return None;
811            }
812        }
813    }
814
815    /// Set the sender half of the channel who requested the creation of this
816    /// target
817    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
818        self.initiator = Some(tx);
819    }
820
821    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
822        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
823    }
824}
825
826/// Configuration for how a single target/page should be fetched and processed.
827#[derive(Debug, Clone)]
828pub struct TargetConfig {
829    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
830    /// When `true`, connections will proceed even if certificate validation fails.
831    pub ignore_https_errors: bool,
832    /// Request timeout to use for the main navigation / resource fetch.
833    /// This is the total time allowed before a request is considered failed.
834    pub request_timeout: Duration,
835    /// Optional browser viewport to use for this target.
836    /// When `None`, the default viewport (or headless browser default) is used.
837    pub viewport: Option<Viewport>,
838    /// Enable request interception for this target.
839    /// When `true`, all network requests will pass through the intercept manager.
840    pub request_intercept: bool,
841    /// Enable caching for this target.
842    /// When `true`, responses may be read from and written to the cache layer.
843    pub cache_enabled: bool,
844    /// If `true`, skip visual/asset resources that are not required for HTML content
845    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
846    pub ignore_visuals: bool,
847    /// If `true`, block JavaScript execution (or avoid loading JS resources)
848    /// for this target. This is useful for purely static HTML crawls.
849    pub ignore_javascript: bool,
850    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
851    /// common tracker domains, etc.).
852    pub ignore_analytics: bool,
853    /// Ignore prefetching.
854    pub ignore_prefetch: bool,
855    /// If `true`, block stylesheets and related CSS resources for this target.
856    /// This can reduce bandwidth when only raw HTML is needed.
857    pub ignore_stylesheets: bool,
858    /// If `true`, only HTML documents will be fetched/kept.
859    /// Non-HTML subresources may be skipped entirely.
860    pub only_html: bool,
861    /// Whether service workers are allowed for this target.
862    /// When `true`, service workers may register and intercept requests.
863    pub service_worker_enabled: bool,
864    /// Extra HTTP headers to send with each request for this target.
865    /// Keys should be header names, values their corresponding header values.
866    pub extra_headers: Option<std::collections::HashMap<String, String>>,
867    /// Network intercept manager used to make allow/deny/modify decisions
868    /// for requests when `request_intercept` is enabled.
869    pub intercept_manager: NetworkInterceptManager,
870    /// The maximum number of response bytes allowed for this target.
871    /// When set, responses larger than this limit may be truncated or aborted.
872    pub max_bytes_allowed: Option<u64>,
873    /// Whitelist patterns to allow through the network.
874    pub whitelist_patterns: Option<Vec<String>>,
875    /// Blacklist patterns to black through the network.
876    pub blacklist_patterns: Option<Vec<String>>,
877}
878
879impl Default for TargetConfig {
880    fn default() -> Self {
881        Self {
882            ignore_https_errors: true,
883            request_timeout: Duration::from_secs(REQUEST_TIMEOUT),
884            viewport: Default::default(),
885            request_intercept: false,
886            cache_enabled: true,
887            service_worker_enabled: true,
888            ignore_javascript: false,
889            ignore_visuals: false,
890            ignore_stylesheets: false,
891            ignore_analytics: true,
892            ignore_prefetch: true,
893            only_html: false,
894            extra_headers: Default::default(),
895            intercept_manager: NetworkInterceptManager::Unknown,
896            max_bytes_allowed: None,
897            whitelist_patterns: None,
898            blacklist_patterns: None,
899        }
900    }
901}
902
903#[derive(Debug, Clone, Eq, PartialEq)]
904pub enum TargetType {
905    Page,
906    BackgroundPage,
907    ServiceWorker,
908    SharedWorker,
909    Other,
910    Browser,
911    Webview,
912    Unknown(String),
913}
914
915impl TargetType {
916    pub fn new(ty: &str) -> Self {
917        match ty {
918            "page" => TargetType::Page,
919            "background_page" => TargetType::BackgroundPage,
920            "service_worker" => TargetType::ServiceWorker,
921            "shared_worker" => TargetType::SharedWorker,
922            "other" => TargetType::Other,
923            "browser" => TargetType::Browser,
924            "webview" => TargetType::Webview,
925            s => TargetType::Unknown(s.to_string()),
926        }
927    }
928
929    pub fn is_page(&self) -> bool {
930        matches!(self, TargetType::Page)
931    }
932
933    pub fn is_background_page(&self) -> bool {
934        matches!(self, TargetType::BackgroundPage)
935    }
936
937    pub fn is_service_worker(&self) -> bool {
938        matches!(self, TargetType::ServiceWorker)
939    }
940
941    pub fn is_shared_worker(&self) -> bool {
942        matches!(self, TargetType::SharedWorker)
943    }
944
945    pub fn is_other(&self) -> bool {
946        matches!(self, TargetType::Other)
947    }
948
949    pub fn is_browser(&self) -> bool {
950        matches!(self, TargetType::Browser)
951    }
952
953    pub fn is_webview(&self) -> bool {
954        matches!(self, TargetType::Webview)
955    }
956}
957
958#[derive(Debug)]
959pub(crate) enum TargetEvent {
960    /// An internal request
961    Request(Request),
962    /// An internal navigation request
963    NavigationRequest(NavigationId, Request),
964    /// Indicates that a previous requested navigation has finished
965    NavigationResult(Result<NavigationOk, NavigationError>),
966    /// A new command arrived via a channel
967    Command(CommandMessage),
968    /// The bytes consumed by the network.
969    BytesConsumed(u64),
970}
971
972// TODO this can be moved into the classes?
973#[derive(Debug, PartialEq)]
974pub enum TargetInit {
975    InitializingFrame(CommandChain),
976    InitializingNetwork(CommandChain),
977    InitializingPage(CommandChain),
978    InitializingEmulation(CommandChain),
979    AttachToTarget,
980    Initialized,
981    Closing,
982}
983
984impl TargetInit {
985    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
986        match self {
987            TargetInit::InitializingFrame(cmd) => Some(cmd),
988            TargetInit::InitializingNetwork(cmd) => Some(cmd),
989            TargetInit::InitializingPage(cmd) => Some(cmd),
990            TargetInit::InitializingEmulation(cmd) => Some(cmd),
991            TargetInit::AttachToTarget => None,
992            TargetInit::Initialized => None,
993            TargetInit::Closing => None,
994        }
995    }
996}
997
998#[derive(Debug)]
999pub struct GetExecutionContext {
1000    /// For which world the execution context was requested
1001    pub dom_world: DOMWorldKind,
1002    /// The if of the frame to get the `ExecutionContext` for
1003    pub frame_id: Option<FrameId>,
1004    /// Sender half of the channel to send the response back
1005    pub tx: Sender<Option<ExecutionContextId>>,
1006}
1007
1008impl GetExecutionContext {
1009    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
1010        Self {
1011            dom_world: DOMWorldKind::Main,
1012            frame_id: None,
1013            tx,
1014        }
1015    }
1016}
1017
1018#[derive(Debug)]
1019pub struct GetUrl {
1020    /// The id of the frame to get the url for (None = main frame)
1021    pub frame_id: Option<FrameId>,
1022    /// Sender half of the channel to send the response back
1023    pub tx: Sender<Option<String>>,
1024}
1025
1026impl GetUrl {
1027    pub fn new(tx: Sender<Option<String>>) -> Self {
1028        Self { frame_id: None, tx }
1029    }
1030}
1031
1032#[derive(Debug)]
1033pub struct GetName {
1034    /// The id of the frame to get the name for (None = main frame)
1035    pub frame_id: Option<FrameId>,
1036    /// Sender half of the channel to send the response back
1037    pub tx: Sender<Option<String>>,
1038}
1039
1040#[derive(Debug)]
1041pub struct GetParent {
1042    /// The id of the frame to get the parent for (None = main frame)
1043    pub frame_id: FrameId,
1044    /// Sender half of the channel to send the response back
1045    pub tx: Sender<Option<FrameId>>,
1046}
1047
1048#[derive(Debug)]
1049pub enum TargetMessage {
1050    /// Execute a command within the session of this target
1051    Command(CommandMessage),
1052    /// Return the main frame of this target's page
1053    MainFrame(Sender<Option<FrameId>>),
1054    /// Return all the frames of this target's page
1055    AllFrames(Sender<Vec<FrameId>>),
1056    #[cfg(feature = "_cache")]
1057    /// Set the cache key and policy for the target page.
1058    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1059    /// Return the url if available
1060    Url(GetUrl),
1061    /// Return the name if available
1062    Name(GetName),
1063    /// Return the parent id of a frame
1064    Parent(GetParent),
1065    /// A Message that resolves when the frame finished loading a new url
1066    WaitForNavigation(Sender<ArcHttpRequest>),
1067    /// A Message that resolves when the frame network is idle
1068    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1069    /// A Message that resolves when the frame network is almost idle
1070    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1071    /// A request to submit a new listener that gets notified with every
1072    /// received event
1073    AddEventListener(EventListenerRequest),
1074    /// Get the `ExecutionContext` if available
1075    GetExecutionContext(GetExecutionContext),
1076    Authenticate(Credentials),
1077    /// Set block/unblocked networking
1078    BlockNetwork(bool),
1079    /// Enable/Disable internal request paused interception
1080    EnableInterception(bool),
1081}