Skip to main content

chromiumoxide/handler/
target.rs

1use std::collections::VecDeque;
2use std::sync::Arc;
3use std::time::Instant;
4
5use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
6use std::task::{Context, Poll};
7use tokio::sync::oneshot::Sender;
8
9use crate::auth::Credentials;
10use crate::cdp::browser_protocol::target::CloseTargetParams;
11use crate::cmd::CommandChain;
12use crate::cmd::CommandMessage;
13use crate::error::{CdpError, Result};
14use crate::handler::browser::BrowserContext;
15use crate::handler::domworld::DOMWorldKind;
16use crate::handler::emulation::EmulationManager;
17use crate::handler::frame::FrameRequestedNavigation;
18use crate::handler::frame::{
19    FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
20};
21use crate::handler::network::{NetworkEvent, NetworkManager};
22use crate::handler::page::PageHandle;
23use crate::handler::viewport::Viewport;
24use crate::handler::{PageInner, REQUEST_TIMEOUT};
25use crate::listeners::{EventListenerRequest, EventListeners};
26use crate::{page::Page, ArcHttpRequest};
27use chromiumoxide_cdp::cdp::browser_protocol::{
28    browser::BrowserContextId,
29    log as cdplog,
30    page::{FrameId, GetFrameTreeParams},
31    target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
32};
33use chromiumoxide_cdp::cdp::events::CdpEvent;
34use chromiumoxide_cdp::cdp::js_protocol::runtime::{
35    ExecutionContextId, RunIfWaitingForDebuggerParams,
36};
37use chromiumoxide_cdp::cdp::CdpEventMessage;
38use chromiumoxide_types::{Command, Method, Request, Response};
39use spider_network_blocker::intercept_manager::NetworkInterceptManager;
40use std::time::Duration;
41
42macro_rules! advance_state {
43    ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
44        if let Poll::Ready(poll) = $cmds.poll($now) {
45            return match poll {
46                None => {
47                    $s.init_state = $next_state;
48                    $s.poll($cx, $now)
49                }
50                Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
51                    method,
52                    session_id: $s.session_id.clone().map(Into::into),
53                    params,
54                })),
55                Some(Err(_)) => Some($s.on_initialization_failed()),
56            };
57        } else {
58            return None;
59        }
60    }};
61}
62
63lazy_static::lazy_static! {
64    /// Initial start command params.
65    static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
66        if let Ok(attach) = SetAutoAttachParams::builder()
67            .flatten(true)
68            .auto_attach(true)
69            .wait_for_debugger_on_start(true)
70            .build() {
71                let disable_log = cdplog::DisableParams::default();
72
73                let mut cmds =  vec![
74                    (
75                        attach.identifier(),
76                        serde_json::to_value(attach).unwrap_or_default(),
77                    ),
78                    (
79                        disable_log.identifier(),
80                        serde_json::to_value(disable_log).unwrap_or_default(),
81                    )
82                ];
83
84                // enable performance on pages.
85                if cfg!(feature = "collect_metrics") {
86                    let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
87                    cmds.push((
88                        enable_performance.identifier(),
89                        serde_json::to_value(enable_performance).unwrap_or_default(),
90                    ));
91                }
92
93                cmds
94            } else {
95                vec![]
96            }
97    };
98
99    /// Attach to target commands
100    static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
101        let runtime_cmd = RunIfWaitingForDebuggerParams::default();
102
103        (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
104    };
105}
106
107#[derive(Debug)]
108pub struct Target {
109    /// Info about this target as returned from the chromium instance
110    info: TargetInfo,
111    /// The type of this target
112    r#type: TargetType,
113    /// Configs for this target
114    config: TargetConfig,
115    /// The context this target is running in
116    browser_context: BrowserContext,
117    /// The frame manager that maintains the state of all frames and handles
118    /// navigations of frames
119    frame_manager: FrameManager,
120    /// Handles all the https
121    pub(crate) network_manager: NetworkManager,
122    emulation_manager: EmulationManager,
123    /// The identifier of the session this target is attached to
124    session_id: Option<SessionId>,
125    /// The handle of the browser page of this target
126    page: Option<PageHandle>,
127    /// Drives this target towards initialization
128    pub(crate) init_state: TargetInit,
129    /// Currently queued events to report to the `Handler`
130    queued_events: VecDeque<TargetEvent>,
131    /// All registered event subscriptions
132    event_listeners: EventListeners,
133    /// Senders that need to be notified once the main frame has loaded
134    wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
135    /// Senders that need to be notified once the main frame reaches `networkIdle`.
136    wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
137    /// (Optional) for `networkAlmostIdle` if you want it as well.
138    wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
139    /// The sender who requested the page.
140    initiator: Option<Sender<Result<Page>>>,
141}
142
143impl Target {
144    /// Create a new target instance with `TargetInfo` after a
145    /// `CreateTargetParams` request.
146    pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
147        let ty = TargetType::new(&info.r#type);
148        let request_timeout: Duration = config.request_timeout;
149        let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
150
151        if !config.cache_enabled {
152            network_manager.set_cache_enabled(false);
153        }
154
155        if !config.service_worker_enabled {
156            network_manager.set_service_worker_enabled(true);
157        }
158
159        network_manager.set_request_interception(config.request_intercept);
160        network_manager.max_bytes_allowed = config.max_bytes_allowed;
161
162        if let Some(headers) = &config.extra_headers {
163            network_manager.set_extra_headers(headers.clone());
164        }
165
166        if let Some(whitelist) = &config.whitelist_patterns {
167            network_manager.set_whitelist_patterns(whitelist.clone());
168        }
169
170        if let Some(blacklist) = &config.blacklist_patterns {
171            network_manager.set_blacklist_patterns(blacklist);
172        }
173
174        network_manager.ignore_visuals = config.ignore_visuals;
175        network_manager.block_javascript = config.ignore_javascript;
176        network_manager.block_analytics = config.ignore_analytics;
177        network_manager.block_prefetch = config.ignore_prefetch;
178
179        network_manager.block_stylesheets = config.ignore_stylesheets;
180        network_manager.only_html = config.only_html;
181        network_manager.intercept_manager = config.intercept_manager;
182
183        Self {
184            info,
185            r#type: ty,
186            config,
187            frame_manager: FrameManager::new(request_timeout),
188            network_manager,
189            emulation_manager: EmulationManager::new(request_timeout),
190            session_id: None,
191            page: None,
192            init_state: TargetInit::AttachToTarget,
193            wait_for_frame_navigation: Default::default(),
194            wait_for_network_idle: Default::default(),
195            wait_for_network_almost_idle: Default::default(),
196            queued_events: Default::default(),
197            event_listeners: Default::default(),
198            initiator: None,
199            browser_context,
200        }
201    }
202
203    /// Set the session id.
204    pub fn set_session_id(&mut self, id: SessionId) {
205        self.session_id = Some(id)
206    }
207
208    /// Get the session id.
209    pub fn session_id(&self) -> Option<&SessionId> {
210        self.session_id.as_ref()
211    }
212
213    /// Get the session id mut.
214    pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
215        &mut self.session_id
216    }
217
218    /// Get the browser context.
219    pub fn browser_context(&self) -> &BrowserContext {
220        &self.browser_context
221    }
222
223    /// The identifier for this target
224    pub fn target_id(&self) -> &TargetId {
225        &self.info.target_id
226    }
227
228    /// The type of this target
229    pub fn r#type(&self) -> &TargetType {
230        &self.r#type
231    }
232
233    /// Whether this target is already initialized
234    pub fn is_initialized(&self) -> bool {
235        matches!(self.init_state, TargetInit::Initialized)
236    }
237
238    /// Navigate a frame
239    pub fn goto(&mut self, req: FrameRequestedNavigation) {
240        if self.network_manager.has_target_domain() {
241            self.network_manager.clear_target_domain();
242            let goto_url = req
243                .req
244                .params
245                .as_object()
246                .and_then(|o| o.get("url"))
247                .and_then(|v| v.as_str());
248
249            if let Some(url) = goto_url {
250                self.network_manager.set_page_url(url.into());
251            }
252        }
253        self.frame_manager.goto(req);
254    }
255
256    /// Create a new page from the session.
257    fn create_page(&mut self) {
258        if self.page.is_none() {
259            if let Some(session) = self.session_id.clone() {
260                let handle = PageHandle::new(
261                    self.target_id().clone(),
262                    session,
263                    self.opener_id().cloned(),
264                    self.config.request_timeout,
265                );
266                self.page = Some(handle);
267            }
268        }
269    }
270
271    /// Tries to create the `PageInner` if this target is already initialized
272    pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
273        self.create_page();
274        self.page.as_ref().map(|p| p.inner())
275    }
276
277    /// Is the target a page?
278    pub fn is_page(&self) -> bool {
279        self.r#type().is_page()
280    }
281
282    /// The browser context ID.
283    pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
284        self.info.browser_context_id.as_ref()
285    }
286
287    /// The target connection info.
288    pub fn info(&self) -> &TargetInfo {
289        &self.info
290    }
291
292    /// Get the target that opened this target. Top-level targets return `None`.
293    pub fn opener_id(&self) -> Option<&TargetId> {
294        self.info.opener_id.as_ref()
295    }
296
297    pub fn frame_manager(&self) -> &FrameManager {
298        &self.frame_manager
299    }
300
301    /// The frame manager.
302    pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
303        &mut self.frame_manager
304    }
305
306    /// Get event listeners mutably.
307    pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
308        &mut self.event_listeners
309    }
310
311    /// Received a response to a command issued by this target
312    pub fn on_response(&mut self, resp: Response, method: &str) {
313        if let Some(cmds) = self.init_state.commands_mut() {
314            cmds.received_response(method);
315        }
316
317        if let GetFrameTreeParams::IDENTIFIER = method {
318            if let Some(resp) = resp
319                .result
320                .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
321            {
322                self.frame_manager.on_frame_tree(resp.frame_tree);
323            }
324        }
325        // requests originated from the network manager all return an empty response, hence they
326        // can be ignored here
327    }
328
329    /// On CDP Event message.
330    pub fn on_event(&mut self, event: CdpEventMessage) {
331        let CdpEventMessage {
332            params,
333            method,
334            session_id,
335            ..
336        } = event;
337
338        let is_session_scoped = matches!(
339            params,
340            CdpEvent::FetchRequestPaused(_)
341                | CdpEvent::FetchAuthRequired(_)
342                | CdpEvent::NetworkRequestWillBeSent(_)
343                | CdpEvent::NetworkResponseReceived(_)
344                | CdpEvent::NetworkLoadingFinished(_)
345                | CdpEvent::NetworkLoadingFailed(_)
346                | CdpEvent::PageFrameAttached(_)
347                | CdpEvent::PageFrameDetached(_)
348                | CdpEvent::PageFrameNavigated(_)
349                | CdpEvent::PageNavigatedWithinDocument(_)
350                | CdpEvent::PageLifecycleEvent(_)
351                | CdpEvent::PageFrameStartedLoading(_)
352                | CdpEvent::PageFrameStoppedLoading(_)
353                | CdpEvent::RuntimeExecutionContextCreated(_)
354                | CdpEvent::RuntimeExecutionContextDestroyed(_)
355                | CdpEvent::RuntimeExecutionContextsCleared(_)
356                | CdpEvent::RuntimeBindingCalled(_)
357        );
358
359        if is_session_scoped {
360            let ev_sid: &str = match session_id.as_deref() {
361                Some(s) => s,
362                None => return,
363            };
364
365            let self_sid: &str = match self.session_id.as_ref() {
366                Some(sid) => sid.as_ref(),
367                None => return,
368            };
369
370            if self_sid != ev_sid {
371                return;
372            }
373        }
374
375        match &params {
376            // `FrameManager` events
377            CdpEvent::PageFrameAttached(ev) => self
378                .frame_manager
379                .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
380            CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
381            CdpEvent::PageFrameNavigated(ev) => {
382                self.frame_manager.on_frame_navigated(&ev.frame);
383            }
384            CdpEvent::PageNavigatedWithinDocument(ev) => {
385                self.frame_manager.on_frame_navigated_within_document(ev)
386            }
387            CdpEvent::RuntimeExecutionContextCreated(ev) => {
388                self.frame_manager.on_frame_execution_context_created(ev)
389            }
390            CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
391                self.frame_manager.on_frame_execution_context_destroyed(ev)
392            }
393            CdpEvent::RuntimeExecutionContextsCleared(_) => {
394                self.frame_manager.on_execution_contexts_cleared()
395            }
396            CdpEvent::RuntimeBindingCalled(ev) => {
397                // TODO check if binding registered and payload is json
398                self.frame_manager.on_runtime_binding_called(ev)
399            }
400            CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
401            CdpEvent::PageFrameStartedLoading(ev) => {
402                self.frame_manager.on_frame_started_loading(ev);
403            }
404            CdpEvent::PageFrameStoppedLoading(ev) => {
405                self.frame_manager.on_frame_stopped_loading(ev);
406            }
407            // `Target` events
408            CdpEvent::TargetAttachedToTarget(ev) => {
409                if ev.waiting_for_debugger {
410                    let runtime_cmd = ATTACH_TARGET.clone();
411
412                    self.queued_events.push_back(TargetEvent::Request(Request {
413                        method: runtime_cmd.0,
414                        session_id: Some(ev.session_id.clone().into()),
415                        params: runtime_cmd.1,
416                    }));
417                }
418
419                if "service_worker" == &ev.target_info.r#type {
420                    let detach_command = DetachFromTargetParams::builder()
421                        .session_id(ev.session_id.clone())
422                        .build();
423
424                    let method = detach_command.identifier();
425
426                    if let Ok(params) = serde_json::to_value(detach_command) {
427                        self.queued_events.push_back(TargetEvent::Request(Request {
428                            method,
429                            session_id: self.session_id.clone().map(Into::into),
430                            params,
431                        }));
432                    }
433                }
434            }
435            // `NetworkManager` events
436            CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
437            CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
438            CdpEvent::NetworkRequestWillBeSent(ev) => {
439                self.network_manager.on_request_will_be_sent(ev)
440            }
441            CdpEvent::NetworkRequestServedFromCache(ev) => {
442                self.network_manager.on_request_served_from_cache(ev)
443            }
444            CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
445            CdpEvent::NetworkLoadingFinished(ev) => {
446                self.network_manager.on_network_loading_finished(ev)
447            }
448            CdpEvent::NetworkLoadingFailed(ev) => {
449                self.network_manager.on_network_loading_failed(ev)
450            }
451            _ => (),
452        }
453        chromiumoxide_cdp::consume_event!(match params {
454           |ev| self.event_listeners.start_send(ev),
455           |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
456        });
457    }
458
459    /// Called when a init command timed out
460    fn on_initialization_failed(&mut self) -> TargetEvent {
461        if let Some(initiator) = self.initiator.take() {
462            let _ = initiator.send(Err(CdpError::Timeout));
463        }
464        self.init_state = TargetInit::Closing;
465        let close_target = CloseTargetParams::new(self.info.target_id.clone());
466
467        TargetEvent::Request(Request {
468            method: close_target.identifier(),
469            session_id: self.session_id.clone().map(Into::into),
470            params: serde_json::to_value(close_target).unwrap_or_default(),
471        })
472    }
473
474    /// Advance that target's state
475    pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
476        if !self.is_page() {
477            // can only poll pages
478            return None;
479        }
480
481        match &mut self.init_state {
482            TargetInit::AttachToTarget => {
483                self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
484                    self.config.request_timeout,
485                ));
486
487                if let Ok(params) = AttachToTargetParams::builder()
488                    .target_id(self.target_id().clone())
489                    .flatten(true)
490                    .build()
491                {
492                    return Some(TargetEvent::Request(Request::new(
493                        params.identifier(),
494                        serde_json::to_value(params).unwrap_or_default(),
495                    )));
496                } else {
497                    return None;
498                }
499            }
500            TargetInit::InitializingFrame(cmds) => {
501                self.session_id.as_ref()?;
502                if let Poll::Ready(poll) = cmds.poll(now) {
503                    return match poll {
504                        None => {
505                            if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
506                                let world_name = world_name.clone();
507
508                                if let Some(isolated_world_cmds) =
509                                    self.frame_manager.ensure_isolated_world(&world_name)
510                                {
511                                    *cmds = isolated_world_cmds;
512                                } else {
513                                    self.init_state = TargetInit::InitializingNetwork(
514                                        self.network_manager.init_commands(),
515                                    );
516                                }
517                            } else {
518                                self.init_state = TargetInit::InitializingNetwork(
519                                    self.network_manager.init_commands(),
520                                );
521                            }
522                            self.poll(cx, now)
523                        }
524                        Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
525                            method,
526                            session_id: self.session_id.clone().map(Into::into),
527                            params,
528                        })),
529                        Some(Err(_)) => Some(self.on_initialization_failed()),
530                    };
531                } else {
532                    return None;
533                }
534            }
535            TargetInit::InitializingNetwork(cmds) => {
536                advance_state!(
537                    self,
538                    cx,
539                    now,
540                    cmds,
541                    TargetInit::InitializingPage(Self::page_init_commands(
542                        self.config.request_timeout
543                    ))
544                );
545            }
546            TargetInit::InitializingPage(cmds) => {
547                advance_state!(
548                    self,
549                    cx,
550                    now,
551                    cmds,
552                    match self.config.viewport.as_ref() {
553                        Some(viewport) => TargetInit::InitializingEmulation(
554                            self.emulation_manager.init_commands(viewport)
555                        ),
556                        None => TargetInit::Initialized,
557                    }
558                );
559            }
560            TargetInit::InitializingEmulation(cmds) => {
561                advance_state!(self, cx, now, cmds, TargetInit::Initialized);
562            }
563            TargetInit::Initialized => {
564                if let Some(initiator) = self.initiator.take() {
565                    // make sure that the main frame of the page has finished loading
566                    if self
567                        .frame_manager
568                        .main_frame()
569                        .map(|frame| frame.is_loaded())
570                        .unwrap_or_default()
571                    {
572                        if let Some(page) = self.get_or_create_page() {
573                            let _ = initiator.send(Ok(page.clone().into()));
574                        } else {
575                            self.initiator = Some(initiator);
576                        }
577                    } else {
578                        self.initiator = Some(initiator);
579                    }
580                }
581            }
582            TargetInit::Closing => return None,
583        };
584
585        loop {
586            if self.init_state == TargetInit::Closing {
587                break None;
588            }
589
590            if let Some(frame) = self.frame_manager.main_frame() {
591                if frame.is_loaded() {
592                    while let Some(tx) = self.wait_for_frame_navigation.pop() {
593                        let _ = tx.send(frame.http_request().cloned());
594                    }
595                }
596
597                if frame.is_network_idle() {
598                    while let Some(tx) = self.wait_for_network_idle.pop() {
599                        let _ = tx.send(frame.http_request().cloned());
600                    }
601                }
602
603                if frame.is_network_almost_idle() {
604                    while let Some(tx) = self.wait_for_network_almost_idle.pop() {
605                        let _ = tx.send(frame.http_request().cloned());
606                    }
607                }
608            }
609
610            // Drain queued messages first.
611            if let Some(ev) = self.queued_events.pop_front() {
612                return Some(ev);
613            }
614
615            if let Some(handle) = self.page.as_mut() {
616                while let Poll::Ready(Some(msg)) = handle.rx.poll_recv(cx) {
617                    if self.init_state == TargetInit::Closing {
618                        break;
619                    }
620
621                    match msg {
622                        TargetMessage::Command(cmd) => {
623                            if cmd.method == "Network.setBlockedURLs" {
624                                if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
625                                {
626                                    let mut unblock_all = false;
627                                    let mut block_all = false;
628
629                                    for s in arr.iter().filter_map(|v| v.as_str()) {
630                                        if s == "!*" {
631                                            unblock_all = true;
632                                            break; // "!*" overrides any block rules
633                                        }
634                                        if s.contains('*') {
635                                            block_all = true;
636                                        }
637                                    }
638
639                                    if unblock_all {
640                                        self.network_manager.set_block_all(false);
641                                    } else if block_all {
642                                        self.network_manager.set_block_all(true);
643                                    }
644                                }
645                            }
646                            self.queued_events.push_back(TargetEvent::Command(cmd));
647                        }
648                        TargetMessage::MainFrame(tx) => {
649                            let _ =
650                                tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
651                        }
652                        TargetMessage::AllFrames(tx) => {
653                            let _ = tx.send(
654                                self.frame_manager
655                                    .frames()
656                                    .map(|f| f.id().clone())
657                                    .collect(),
658                            );
659                        }
660                        #[cfg(feature = "_cache")]
661                        TargetMessage::CacheKey((cache_key, cache_policy)) => {
662                            self.network_manager.set_cache_site_key(cache_key);
663                            self.network_manager.set_cache_policy(cache_policy);
664                        }
665                        TargetMessage::Url(req) => {
666                            let GetUrl { frame_id, tx } = req;
667                            let frame = if let Some(frame_id) = frame_id {
668                                self.frame_manager.frame(&frame_id)
669                            } else {
670                                self.frame_manager.main_frame()
671                            };
672                            let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
673                        }
674                        TargetMessage::Name(req) => {
675                            let GetName { frame_id, tx } = req;
676                            let frame = if let Some(frame_id) = frame_id {
677                                self.frame_manager.frame(&frame_id)
678                            } else {
679                                self.frame_manager.main_frame()
680                            };
681                            let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
682                        }
683                        TargetMessage::Parent(req) => {
684                            let GetParent { frame_id, tx } = req;
685                            let frame = self.frame_manager.frame(&frame_id);
686                            let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
687                        }
688                        TargetMessage::WaitForNavigation(tx) => {
689                            if let Some(frame) = self.frame_manager.main_frame() {
690                                if frame.is_loaded() {
691                                    let _ = tx.send(frame.http_request().cloned());
692                                } else {
693                                    self.wait_for_frame_navigation.push(tx);
694                                }
695                            } else {
696                                self.wait_for_frame_navigation.push(tx);
697                            }
698                        }
699                        TargetMessage::WaitForNetworkIdle(tx) => {
700                            if let Some(frame) = self.frame_manager.main_frame() {
701                                if frame.is_network_idle() {
702                                    let _ = tx.send(frame.http_request().cloned());
703                                } else {
704                                    self.wait_for_network_idle.push(tx);
705                                }
706                            } else {
707                                self.wait_for_network_idle.push(tx);
708                            }
709                        }
710                        TargetMessage::WaitForNetworkAlmostIdle(tx) => {
711                            if let Some(frame) = self.frame_manager.main_frame() {
712                                if frame.is_network_almost_idle() {
713                                    let _ = tx.send(frame.http_request().cloned());
714                                } else {
715                                    self.wait_for_network_almost_idle.push(tx);
716                                }
717                            } else {
718                                self.wait_for_network_almost_idle.push(tx);
719                            }
720                        }
721                        TargetMessage::AddEventListener(req) => {
722                            if req.method == "Fetch.requestPaused" {
723                                self.network_manager.enable_request_intercept();
724                            }
725                            // register a new listener
726                            self.event_listeners.add_listener(req);
727                        }
728                        TargetMessage::GetExecutionContext(ctx) => {
729                            let GetExecutionContext {
730                                dom_world,
731                                frame_id,
732                                tx,
733                            } = ctx;
734                            let frame = if let Some(frame_id) = frame_id {
735                                self.frame_manager.frame(&frame_id)
736                            } else {
737                                self.frame_manager.main_frame()
738                            };
739
740                            if let Some(frame) = frame {
741                                match dom_world {
742                                    DOMWorldKind::Main => {
743                                        let _ = tx.send(frame.main_world().execution_context());
744                                    }
745                                    DOMWorldKind::Secondary => {
746                                        let _ =
747                                            tx.send(frame.secondary_world().execution_context());
748                                    }
749                                }
750                            } else {
751                                let _ = tx.send(None);
752                            }
753                        }
754                        TargetMessage::Authenticate(credentials) => {
755                            self.network_manager.authenticate(credentials);
756                        }
757                        TargetMessage::BlockNetwork(blocked) => {
758                            self.network_manager.set_block_all(blocked);
759                        }
760                        TargetMessage::EnableInterception(enabled) => {
761                            // if interception is enabled disable the user facing handling.
762                            self.network_manager.user_request_interception_enabled = !enabled;
763                        }
764                    }
765                }
766            }
767
768            while let Some(event) = self.network_manager.poll() {
769                if self.init_state == TargetInit::Closing {
770                    break;
771                }
772                match event {
773                    NetworkEvent::SendCdpRequest((method, params)) => {
774                        // send a message to the browser
775                        self.queued_events.push_back(TargetEvent::Request(Request {
776                            method,
777                            session_id: self.session_id.clone().map(Into::into),
778                            params,
779                        }))
780                    }
781                    NetworkEvent::Request(_) => {}
782                    NetworkEvent::Response(_) => {}
783                    NetworkEvent::RequestFailed(request) => {
784                        self.frame_manager.on_http_request_finished(request);
785                    }
786                    NetworkEvent::RequestFinished(request) => {
787                        self.frame_manager.on_http_request_finished(request);
788                    }
789                    NetworkEvent::BytesConsumed(n) => {
790                        self.queued_events.push_back(TargetEvent::BytesConsumed(n));
791                    }
792                }
793            }
794
795            while let Some(event) = self.frame_manager.poll(now) {
796                if self.init_state == TargetInit::Closing {
797                    break;
798                }
799                match event {
800                    FrameEvent::NavigationResult(res) => {
801                        self.queued_events
802                            .push_back(TargetEvent::NavigationResult(res));
803                    }
804                    FrameEvent::NavigationRequest(id, req) => {
805                        self.queued_events
806                            .push_back(TargetEvent::NavigationRequest(id, req));
807                    }
808                }
809            }
810
811            if self.queued_events.is_empty() {
812                return None;
813            }
814        }
815    }
816
817    /// Set the sender half of the channel who requested the creation of this
818    /// target
819    pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
820        self.initiator = Some(tx);
821    }
822
823    pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
824        CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
825    }
826}
827
828/// Configuration for how a single target/page should be fetched and processed.
829#[derive(Debug, Clone)]
830pub struct TargetConfig {
831    /// Whether to ignore TLS/HTTPS certificate errors (e.g. self-signed or expired certs).
832    /// When `true`, connections will proceed even if certificate validation fails.
833    pub ignore_https_errors: bool,
834    /// Request timeout to use for the main navigation / resource fetch.
835    /// This is the total time allowed before a request is considered failed.
836    pub request_timeout: Duration,
837    /// Optional browser viewport to use for this target.
838    /// When `None`, the default viewport (or headless browser default) is used.
839    pub viewport: Option<Viewport>,
840    /// Enable request interception for this target.
841    /// When `true`, all network requests will pass through the intercept manager.
842    pub request_intercept: bool,
843    /// Enable caching for this target.
844    /// When `true`, responses may be read from and written to the cache layer.
845    pub cache_enabled: bool,
846    /// If `true`, skip visual/asset resources that are not required for HTML content
847    /// (e.g. images, fonts, media). Useful for performance-oriented crawls.
848    pub ignore_visuals: bool,
849    /// If `true`, block JavaScript execution (or avoid loading JS resources)
850    /// for this target. This is useful for purely static HTML crawls.
851    pub ignore_javascript: bool,
852    /// If `true`, block analytics / tracking requests (e.g. Google Analytics,
853    /// common tracker domains, etc.).
854    pub ignore_analytics: bool,
855    /// Ignore prefetching.
856    pub ignore_prefetch: bool,
857    /// If `true`, block stylesheets and related CSS resources for this target.
858    /// This can reduce bandwidth when only raw HTML is needed.
859    pub ignore_stylesheets: bool,
860    /// If `true`, only HTML documents will be fetched/kept.
861    /// Non-HTML subresources may be skipped entirely.
862    pub only_html: bool,
863    /// Whether service workers are allowed for this target.
864    /// When `true`, service workers may register and intercept requests.
865    pub service_worker_enabled: bool,
866    /// Extra HTTP headers to send with each request for this target.
867    /// Keys should be header names, values their corresponding header values.
868    pub extra_headers: Option<std::collections::HashMap<String, String>>,
869    /// Network intercept manager used to make allow/deny/modify decisions
870    /// for requests when `request_intercept` is enabled.
871    pub intercept_manager: NetworkInterceptManager,
872    /// The maximum number of response bytes allowed for this target.
873    /// When set, responses larger than this limit may be truncated or aborted.
874    pub max_bytes_allowed: Option<u64>,
875    /// Whitelist patterns to allow through the network.
876    pub whitelist_patterns: Option<Vec<String>>,
877    /// Blacklist patterns to black through the network.
878    pub blacklist_patterns: Option<Vec<String>>,
879}
880
881impl Default for TargetConfig {
882    fn default() -> Self {
883        Self {
884            ignore_https_errors: true,
885            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
886            viewport: Default::default(),
887            request_intercept: false,
888            cache_enabled: true,
889            service_worker_enabled: true,
890            ignore_javascript: false,
891            ignore_visuals: false,
892            ignore_stylesheets: false,
893            ignore_analytics: true,
894            ignore_prefetch: true,
895            only_html: false,
896            extra_headers: Default::default(),
897            intercept_manager: NetworkInterceptManager::Unknown,
898            max_bytes_allowed: None,
899            whitelist_patterns: None,
900            blacklist_patterns: None,
901        }
902    }
903}
904
905#[derive(Debug, Clone, Eq, PartialEq)]
906pub enum TargetType {
907    Page,
908    BackgroundPage,
909    ServiceWorker,
910    SharedWorker,
911    Other,
912    Browser,
913    Webview,
914    Unknown(String),
915}
916
917impl TargetType {
918    pub fn new(ty: &str) -> Self {
919        match ty {
920            "page" => TargetType::Page,
921            "background_page" => TargetType::BackgroundPage,
922            "service_worker" => TargetType::ServiceWorker,
923            "shared_worker" => TargetType::SharedWorker,
924            "other" => TargetType::Other,
925            "browser" => TargetType::Browser,
926            "webview" => TargetType::Webview,
927            s => TargetType::Unknown(s.to_string()),
928        }
929    }
930
931    pub fn is_page(&self) -> bool {
932        matches!(self, TargetType::Page)
933    }
934
935    pub fn is_background_page(&self) -> bool {
936        matches!(self, TargetType::BackgroundPage)
937    }
938
939    pub fn is_service_worker(&self) -> bool {
940        matches!(self, TargetType::ServiceWorker)
941    }
942
943    pub fn is_shared_worker(&self) -> bool {
944        matches!(self, TargetType::SharedWorker)
945    }
946
947    pub fn is_other(&self) -> bool {
948        matches!(self, TargetType::Other)
949    }
950
951    pub fn is_browser(&self) -> bool {
952        matches!(self, TargetType::Browser)
953    }
954
955    pub fn is_webview(&self) -> bool {
956        matches!(self, TargetType::Webview)
957    }
958}
959
960#[derive(Debug)]
961pub(crate) enum TargetEvent {
962    /// An internal request
963    Request(Request),
964    /// An internal navigation request
965    NavigationRequest(NavigationId, Request),
966    /// Indicates that a previous requested navigation has finished
967    NavigationResult(Result<NavigationOk, NavigationError>),
968    /// A new command arrived via a channel
969    Command(CommandMessage),
970    /// The bytes consumed by the network.
971    BytesConsumed(u64),
972}
973
974// TODO this can be moved into the classes?
975#[derive(Debug, PartialEq)]
976pub enum TargetInit {
977    InitializingFrame(CommandChain),
978    InitializingNetwork(CommandChain),
979    InitializingPage(CommandChain),
980    InitializingEmulation(CommandChain),
981    AttachToTarget,
982    Initialized,
983    Closing,
984}
985
986impl TargetInit {
987    fn commands_mut(&mut self) -> Option<&mut CommandChain> {
988        match self {
989            TargetInit::InitializingFrame(cmd) => Some(cmd),
990            TargetInit::InitializingNetwork(cmd) => Some(cmd),
991            TargetInit::InitializingPage(cmd) => Some(cmd),
992            TargetInit::InitializingEmulation(cmd) => Some(cmd),
993            TargetInit::AttachToTarget => None,
994            TargetInit::Initialized => None,
995            TargetInit::Closing => None,
996        }
997    }
998}
999
1000#[derive(Debug)]
1001pub struct GetExecutionContext {
1002    /// For which world the execution context was requested
1003    pub dom_world: DOMWorldKind,
1004    /// The if of the frame to get the `ExecutionContext` for
1005    pub frame_id: Option<FrameId>,
1006    /// Sender half of the channel to send the response back
1007    pub tx: Sender<Option<ExecutionContextId>>,
1008}
1009
1010impl GetExecutionContext {
1011    pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
1012        Self {
1013            dom_world: DOMWorldKind::Main,
1014            frame_id: None,
1015            tx,
1016        }
1017    }
1018}
1019
1020#[derive(Debug)]
1021pub struct GetUrl {
1022    /// The id of the frame to get the url for (None = main frame)
1023    pub frame_id: Option<FrameId>,
1024    /// Sender half of the channel to send the response back
1025    pub tx: Sender<Option<String>>,
1026}
1027
1028impl GetUrl {
1029    pub fn new(tx: Sender<Option<String>>) -> Self {
1030        Self { frame_id: None, tx }
1031    }
1032}
1033
1034#[derive(Debug)]
1035pub struct GetName {
1036    /// The id of the frame to get the name for (None = main frame)
1037    pub frame_id: Option<FrameId>,
1038    /// Sender half of the channel to send the response back
1039    pub tx: Sender<Option<String>>,
1040}
1041
1042#[derive(Debug)]
1043pub struct GetParent {
1044    /// The id of the frame to get the parent for (None = main frame)
1045    pub frame_id: FrameId,
1046    /// Sender half of the channel to send the response back
1047    pub tx: Sender<Option<FrameId>>,
1048}
1049
1050#[derive(Debug)]
1051pub enum TargetMessage {
1052    /// Execute a command within the session of this target
1053    Command(CommandMessage),
1054    /// Return the main frame of this target's page
1055    MainFrame(Sender<Option<FrameId>>),
1056    /// Return all the frames of this target's page
1057    AllFrames(Sender<Vec<FrameId>>),
1058    #[cfg(feature = "_cache")]
1059    /// Set the cache key and policy for the target page.
1060    CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1061    /// Return the url if available
1062    Url(GetUrl),
1063    /// Return the name if available
1064    Name(GetName),
1065    /// Return the parent id of a frame
1066    Parent(GetParent),
1067    /// A Message that resolves when the frame finished loading a new url
1068    WaitForNavigation(Sender<ArcHttpRequest>),
1069    /// A Message that resolves when the frame network is idle
1070    WaitForNetworkIdle(Sender<ArcHttpRequest>),
1071    /// A Message that resolves when the frame network is almost idle
1072    WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1073    /// A request to submit a new listener that gets notified with every
1074    /// received event
1075    AddEventListener(EventListenerRequest),
1076    /// Get the `ExecutionContext` if available
1077    GetExecutionContext(GetExecutionContext),
1078    Authenticate(Credentials),
1079    /// Set block/unblocked networking
1080    BlockNetwork(bool),
1081    /// Enable/Disable internal request paused interception
1082    EnableInterception(bool),
1083}