1use std::collections::VecDeque;
2use std::sync::Arc;
3use std::time::Instant;
4
5use chromiumoxide_cdp::cdp::browser_protocol::target::DetachFromTargetParams;
6use std::task::{Context, Poll};
7use tokio::sync::oneshot::Sender;
8
9use tokio::sync::Notify;
10
11use crate::auth::Credentials;
12use crate::cdp::browser_protocol::target::CloseTargetParams;
13use crate::cmd::CommandChain;
14use crate::cmd::CommandMessage;
15use crate::error::{CdpError, Result};
16use crate::handler::browser::BrowserContext;
17use crate::handler::domworld::DOMWorldKind;
18use crate::handler::emulation::EmulationManager;
19use crate::handler::frame::FrameRequestedNavigation;
20use crate::handler::frame::{
21 FrameEvent, FrameManager, NavigationError, NavigationId, NavigationOk,
22};
23use crate::handler::network::{NetworkEvent, NetworkManager};
24use crate::handler::page::PageHandle;
25use crate::handler::viewport::Viewport;
26use crate::handler::{PageInner, REQUEST_TIMEOUT};
27use crate::listeners::{EventListenerRequest, EventListeners};
28use crate::{page::Page, ArcHttpRequest};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30 browser::BrowserContextId,
31 log as cdplog,
32 page::{FrameId, GetFrameTreeParams},
33 target::{AttachToTargetParams, SessionId, SetAutoAttachParams, TargetId, TargetInfo},
34};
35use chromiumoxide_cdp::cdp::events::CdpEvent;
36use chromiumoxide_cdp::cdp::js_protocol::runtime::{
37 ExecutionContextId, RunIfWaitingForDebuggerParams,
38};
39use chromiumoxide_cdp::cdp::CdpEventMessage;
40use chromiumoxide_types::{Command, Method, Request, Response};
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42use std::time::Duration;
43
44macro_rules! advance_state {
45 ($s:ident, $cx:ident, $now:ident, $cmds: ident, $next_state:expr ) => {{
46 if let Poll::Ready(poll) = $cmds.poll($now) {
47 return match poll {
48 None => {
49 $s.init_state = $next_state;
50 $s.poll($cx, $now)
51 }
52 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
53 method,
54 session_id: $s.session_id.clone().map(Into::into),
55 params,
56 })),
57 Some(Err(_)) => Some($s.on_initialization_failed()),
58 };
59 } else {
60 return None;
61 }
62 }};
63}
64
65lazy_static::lazy_static! {
66 static ref INIT_COMMANDS_PARAMS: Vec<(chromiumoxide_types::MethodId, serde_json::Value)> = {
68 if let Ok(attach) = SetAutoAttachParams::builder()
69 .flatten(true)
70 .auto_attach(true)
71 .wait_for_debugger_on_start(true)
72 .build() {
73 let disable_log = cdplog::DisableParams::default();
74
75 let mut cmds = vec![
76 (
77 attach.identifier(),
78 serde_json::to_value(attach).unwrap_or_default(),
79 ),
80 (
81 disable_log.identifier(),
82 serde_json::to_value(disable_log).unwrap_or_default(),
83 )
84 ];
85
86 if cfg!(feature = "collect_metrics") {
88 let enable_performance = chromiumoxide_cdp::cdp::browser_protocol::performance::EnableParams::default();
89 cmds.push((
90 enable_performance.identifier(),
91 serde_json::to_value(enable_performance).unwrap_or_default(),
92 ));
93 }
94
95 cmds
96 } else {
97 vec![]
98 }
99 };
100
101 static ref ATTACH_TARGET: (chromiumoxide_types::MethodId, serde_json::Value) = {
103 let runtime_cmd = RunIfWaitingForDebuggerParams::default();
104
105 (runtime_cmd.identifier(), serde_json::to_value(runtime_cmd).unwrap_or_default())
106 };
107}
108
109#[derive(Debug)]
110pub struct Target {
111 info: TargetInfo,
113 r#type: TargetType,
115 config: TargetConfig,
117 browser_context: BrowserContext,
119 frame_manager: FrameManager,
122 pub(crate) network_manager: NetworkManager,
124 emulation_manager: EmulationManager,
125 session_id: Option<SessionId>,
127 page: Option<PageHandle>,
129 pub(crate) init_state: TargetInit,
131 queued_events: VecDeque<TargetEvent>,
133 event_listeners: EventListeners,
135 wait_for_frame_navigation: Vec<Sender<ArcHttpRequest>>,
137 wait_for_dom_content_loaded: Vec<Sender<ArcHttpRequest>>,
139 wait_for_network_idle: Vec<Sender<ArcHttpRequest>>,
141 wait_for_network_almost_idle: Vec<Sender<ArcHttpRequest>>,
143 initiator: Option<Sender<Result<Page>>>,
145}
146
147impl Target {
148 pub fn new(info: TargetInfo, config: TargetConfig, browser_context: BrowserContext) -> Self {
151 let ty = TargetType::new(&info.r#type);
152 let request_timeout: Duration = config.request_timeout;
153 let mut network_manager = NetworkManager::new(config.ignore_https_errors, request_timeout);
154
155 if !config.cache_enabled {
156 network_manager.set_cache_enabled(false);
157 }
158
159 if !config.service_worker_enabled {
160 network_manager.set_service_worker_enabled(true);
161 }
162
163 network_manager.set_request_interception(config.request_intercept);
164 network_manager.max_bytes_allowed = config.max_bytes_allowed;
165
166 if let Some(headers) = &config.extra_headers {
167 network_manager.set_extra_headers(headers.clone());
168 }
169
170 if let Some(whitelist) = &config.whitelist_patterns {
171 network_manager.set_whitelist_patterns(whitelist.clone());
172 }
173
174 if let Some(blacklist) = &config.blacklist_patterns {
175 network_manager.set_blacklist_patterns(blacklist);
176 }
177
178 network_manager.ignore_visuals = config.ignore_visuals;
179 network_manager.block_javascript = config.ignore_javascript;
180 network_manager.block_analytics = config.ignore_analytics;
181 network_manager.block_prefetch = config.ignore_prefetch;
182
183 network_manager.block_stylesheets = config.ignore_stylesheets;
184 network_manager.only_html = config.only_html;
185 network_manager.intercept_manager = config.intercept_manager;
186
187 #[cfg(feature = "adblock")]
188 if let Some(rules) = &config.adblock_filter_rules {
189 use adblock::lists::{FilterSet, ParseOptions, RuleTypes};
190
191 let mut filter_set = FilterSet::new(false);
192 let mut opts = ParseOptions::default();
193 opts.rule_types = RuleTypes::All;
194
195 filter_set.add_filters(
197 &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
198 opts.clone(),
199 );
200 filter_set.add_filters(rules.iter().map(|s| s.as_str()), opts);
202
203 let engine = adblock::Engine::from_filter_set(filter_set, true);
204 network_manager.set_adblock_engine(std::sync::Arc::new(engine));
205 }
206
207 Self {
208 info,
209 r#type: ty,
210 config,
211 frame_manager: FrameManager::new(request_timeout),
212 network_manager,
213 emulation_manager: EmulationManager::new(request_timeout),
214 session_id: None,
215 page: None,
216 init_state: TargetInit::AttachToTarget,
217 wait_for_frame_navigation: Default::default(),
218 wait_for_dom_content_loaded: Default::default(),
219 wait_for_network_idle: Default::default(),
220 wait_for_network_almost_idle: Default::default(),
221 queued_events: Default::default(),
222 event_listeners: Default::default(),
223 initiator: None,
224 browser_context,
225 }
226 }
227
228 pub fn set_session_id(&mut self, id: SessionId) {
230 self.session_id = Some(id)
231 }
232
233 pub fn session_id(&self) -> Option<&SessionId> {
235 self.session_id.as_ref()
236 }
237
238 pub fn session_id_mut(&mut self) -> &mut Option<SessionId> {
240 &mut self.session_id
241 }
242
243 pub fn browser_context(&self) -> &BrowserContext {
245 &self.browser_context
246 }
247
248 pub fn target_id(&self) -> &TargetId {
250 &self.info.target_id
251 }
252
253 pub fn r#type(&self) -> &TargetType {
255 &self.r#type
256 }
257
258 pub fn is_initialized(&self) -> bool {
260 matches!(self.init_state, TargetInit::Initialized)
261 }
262
263 pub fn goto(&mut self, req: FrameRequestedNavigation) {
265 if self.network_manager.has_target_domain() {
266 self.network_manager.clear_target_domain();
267 let goto_url = req
268 .req
269 .params
270 .as_object()
271 .and_then(|o| o.get("url"))
272 .and_then(|v| v.as_str());
273
274 if let Some(url) = goto_url {
275 self.network_manager.set_page_url(url.into());
276 }
277 }
278 self.frame_manager.goto(req);
279 }
280
281 fn create_page(&mut self) {
283 if self.page.is_none() {
284 if let Some(session) = self.session_id.clone() {
285 let handle = PageHandle::new(
286 self.target_id().clone(),
287 session,
288 self.opener_id().cloned(),
289 self.config.request_timeout,
290 self.config.page_wake.clone(),
291 );
292 self.page = Some(handle);
293 }
294 }
295 }
296
297 pub(crate) fn get_or_create_page(&mut self) -> Option<&Arc<PageInner>> {
299 self.create_page();
300 self.page.as_ref().map(|p| p.inner())
301 }
302
303 pub(crate) fn page_mut(&mut self) -> Option<&mut PageHandle> {
305 self.page.as_mut()
306 }
307
308 pub fn is_page(&self) -> bool {
310 self.r#type().is_page()
311 }
312
313 pub fn browser_context_id(&self) -> Option<&BrowserContextId> {
315 self.info.browser_context_id.as_ref()
316 }
317
318 pub fn info(&self) -> &TargetInfo {
320 &self.info
321 }
322
323 pub fn opener_id(&self) -> Option<&TargetId> {
325 self.info.opener_id.as_ref()
326 }
327
328 pub fn frame_manager(&self) -> &FrameManager {
329 &self.frame_manager
330 }
331
332 pub fn frame_manager_mut(&mut self) -> &mut FrameManager {
334 &mut self.frame_manager
335 }
336
337 pub fn event_listeners_mut(&mut self) -> &mut EventListeners {
339 &mut self.event_listeners
340 }
341
342 pub fn on_response(&mut self, resp: Response, method: &str) {
344 if let Some(cmds) = self.init_state.commands_mut() {
345 cmds.received_response(method);
346 }
347
348 if let GetFrameTreeParams::IDENTIFIER = method {
349 if let Some(resp) = resp
350 .result
351 .and_then(|val| GetFrameTreeParams::response_from_value(val).ok())
352 {
353 self.frame_manager.on_frame_tree(resp.frame_tree);
354 }
355 }
356 }
359
360 pub fn on_event(&mut self, event: CdpEventMessage) {
362 let CdpEventMessage {
363 params,
364 method,
365 session_id,
366 ..
367 } = event;
368
369 let is_session_scoped = matches!(
370 params,
371 CdpEvent::FetchRequestPaused(_)
372 | CdpEvent::FetchAuthRequired(_)
373 | CdpEvent::NetworkRequestWillBeSent(_)
374 | CdpEvent::NetworkResponseReceived(_)
375 | CdpEvent::NetworkLoadingFinished(_)
376 | CdpEvent::NetworkLoadingFailed(_)
377 | CdpEvent::PageFrameAttached(_)
378 | CdpEvent::PageFrameDetached(_)
379 | CdpEvent::PageFrameNavigated(_)
380 | CdpEvent::PageNavigatedWithinDocument(_)
381 | CdpEvent::PageLifecycleEvent(_)
382 | CdpEvent::PageFrameStartedLoading(_)
383 | CdpEvent::PageFrameStoppedLoading(_)
384 | CdpEvent::RuntimeExecutionContextCreated(_)
385 | CdpEvent::RuntimeExecutionContextDestroyed(_)
386 | CdpEvent::RuntimeExecutionContextsCleared(_)
387 | CdpEvent::RuntimeBindingCalled(_)
388 );
389
390 if is_session_scoped {
391 let ev_sid: &str = match session_id.as_deref() {
392 Some(s) => s,
393 None => return,
394 };
395
396 let self_sid: &str = match self.session_id.as_ref() {
397 Some(sid) => sid.as_ref(),
398 None => return,
399 };
400
401 if self_sid != ev_sid {
402 return;
403 }
404 }
405
406 match ¶ms {
407 CdpEvent::PageFrameAttached(ev) => self
409 .frame_manager
410 .on_frame_attached(ev.frame_id.clone(), Some(ev.parent_frame_id.clone())),
411 CdpEvent::PageFrameDetached(ev) => self.frame_manager.on_frame_detached(ev),
412 CdpEvent::PageFrameNavigated(ev) => {
413 self.frame_manager.on_frame_navigated(&ev.frame);
414 }
415 CdpEvent::PageNavigatedWithinDocument(ev) => {
416 self.frame_manager.on_frame_navigated_within_document(ev)
417 }
418 CdpEvent::RuntimeExecutionContextCreated(ev) => {
419 self.frame_manager.on_frame_execution_context_created(ev)
420 }
421 CdpEvent::RuntimeExecutionContextDestroyed(ev) => {
422 self.frame_manager.on_frame_execution_context_destroyed(ev)
423 }
424 CdpEvent::RuntimeExecutionContextsCleared(_) => {
425 self.frame_manager.on_execution_contexts_cleared()
426 }
427 CdpEvent::RuntimeBindingCalled(ev) => {
428 self.frame_manager.on_runtime_binding_called(ev)
430 }
431 CdpEvent::PageLifecycleEvent(ev) => self.frame_manager.on_page_lifecycle_event(ev),
432 CdpEvent::PageFrameStartedLoading(ev) => {
433 self.frame_manager.on_frame_started_loading(ev);
434 }
435 CdpEvent::PageFrameStoppedLoading(ev) => {
436 self.frame_manager.on_frame_stopped_loading(ev);
437 }
438 CdpEvent::TargetAttachedToTarget(ev) => {
440 if ev.waiting_for_debugger {
441 let runtime_cmd = ATTACH_TARGET.clone();
442
443 self.queued_events.push_back(TargetEvent::Request(Request {
444 method: runtime_cmd.0,
445 session_id: Some(ev.session_id.clone().into()),
446 params: runtime_cmd.1,
447 }));
448 }
449
450 if "service_worker" == &ev.target_info.r#type {
451 let detach_command = DetachFromTargetParams::builder()
452 .session_id(ev.session_id.clone())
453 .build();
454
455 let method = detach_command.identifier();
456
457 if let Ok(params) = serde_json::to_value(detach_command) {
458 self.queued_events.push_back(TargetEvent::Request(Request {
459 method,
460 session_id: self.session_id.clone().map(Into::into),
461 params,
462 }));
463 }
464 }
465 }
466 CdpEvent::FetchRequestPaused(ev) => self.network_manager.on_fetch_request_paused(ev),
468 CdpEvent::FetchAuthRequired(ev) => self.network_manager.on_fetch_auth_required(ev),
469 CdpEvent::NetworkRequestWillBeSent(ev) => {
470 self.network_manager.on_request_will_be_sent(ev)
471 }
472 CdpEvent::NetworkRequestServedFromCache(ev) => {
473 self.network_manager.on_request_served_from_cache(ev)
474 }
475 CdpEvent::NetworkResponseReceived(ev) => self.network_manager.on_response_received(ev),
476 CdpEvent::NetworkLoadingFinished(ev) => {
477 self.network_manager.on_network_loading_finished(ev)
478 }
479 CdpEvent::NetworkLoadingFailed(ev) => {
480 self.network_manager.on_network_loading_failed(ev)
481 }
482 _ => (),
483 }
484 chromiumoxide_cdp::consume_event!(match params {
485 |ev| self.event_listeners.start_send(ev),
486 |json| { let _ = self.event_listeners.try_send_custom(&method, json);}
487 });
488 }
489
490 fn on_initialization_failed(&mut self) -> TargetEvent {
492 if let Some(initiator) = self.initiator.take() {
493 let _ = initiator.send(Err(CdpError::Timeout));
494 }
495 self.init_state = TargetInit::Closing;
496 let close_target = CloseTargetParams::new(self.info.target_id.clone());
497
498 TargetEvent::Request(Request {
499 method: close_target.identifier(),
500 session_id: self.session_id.clone().map(Into::into),
501 params: serde_json::to_value(close_target).unwrap_or_default(),
502 })
503 }
504
505 pub(crate) fn poll(&mut self, cx: &mut Context<'_>, now: Instant) -> Option<TargetEvent> {
507 if !self.is_page() {
508 return None;
510 }
511
512 match &mut self.init_state {
513 TargetInit::AttachToTarget => {
514 self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
515 self.config.request_timeout,
516 ));
517
518 if let Ok(params) = AttachToTargetParams::builder()
519 .target_id(self.target_id().clone())
520 .flatten(true)
521 .build()
522 {
523 return Some(TargetEvent::Request(Request::new(
524 params.identifier(),
525 serde_json::to_value(params).unwrap_or_default(),
526 )));
527 } else {
528 return None;
529 }
530 }
531 TargetInit::InitializingFrame(cmds) => {
532 self.session_id.as_ref()?;
533 if let Poll::Ready(poll) = cmds.poll(now) {
534 return match poll {
535 None => {
536 if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
537 let world_name = world_name.clone();
538
539 if let Some(isolated_world_cmds) =
540 self.frame_manager.ensure_isolated_world(&world_name)
541 {
542 *cmds = isolated_world_cmds;
543 } else {
544 self.init_state = TargetInit::InitializingNetwork(
545 self.network_manager.init_commands(),
546 );
547 }
548 } else {
549 self.init_state = TargetInit::InitializingNetwork(
550 self.network_manager.init_commands(),
551 );
552 }
553 self.poll(cx, now)
554 }
555 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
556 method,
557 session_id: self.session_id.clone().map(Into::into),
558 params,
559 })),
560 Some(Err(_)) => Some(self.on_initialization_failed()),
561 };
562 } else {
563 return None;
564 }
565 }
566 TargetInit::InitializingNetwork(cmds) => {
567 advance_state!(
568 self,
569 cx,
570 now,
571 cmds,
572 TargetInit::InitializingPage(Self::page_init_commands(
573 self.config.request_timeout
574 ))
575 );
576 }
577 TargetInit::InitializingPage(cmds) => {
578 advance_state!(
579 self,
580 cx,
581 now,
582 cmds,
583 match self.config.viewport.as_ref() {
584 Some(viewport) => TargetInit::InitializingEmulation(
585 self.emulation_manager.init_commands(viewport)
586 ),
587 None => TargetInit::Initialized,
588 }
589 );
590 }
591 TargetInit::InitializingEmulation(cmds) => {
592 advance_state!(self, cx, now, cmds, TargetInit::Initialized);
593 }
594 TargetInit::Initialized => {
595 if let Some(initiator) = self.initiator.take() {
596 if self
598 .frame_manager
599 .main_frame()
600 .map(|frame| frame.is_loaded())
601 .unwrap_or_default()
602 {
603 if let Some(page) = self.get_or_create_page() {
604 let _ = initiator.send(Ok(page.clone().into()));
605 } else {
606 self.initiator = Some(initiator);
607 }
608 } else {
609 self.initiator = Some(initiator);
610 }
611 }
612 }
613 TargetInit::Closing => return None,
614 };
615
616 if !self.wait_for_frame_navigation.is_empty() {
620 self.wait_for_frame_navigation.retain(|tx| !tx.is_closed());
621 }
622 if !self.wait_for_dom_content_loaded.is_empty() {
623 self.wait_for_dom_content_loaded
624 .retain(|tx| !tx.is_closed());
625 }
626 if !self.wait_for_network_idle.is_empty() {
627 self.wait_for_network_idle.retain(|tx| !tx.is_closed());
628 }
629 if !self.wait_for_network_almost_idle.is_empty() {
630 self.wait_for_network_almost_idle
631 .retain(|tx| !tx.is_closed());
632 }
633
634 loop {
635 if self.init_state == TargetInit::Closing {
636 break None;
637 }
638
639 if let Some(frame) = self.frame_manager.main_frame() {
640 if frame.is_dom_content_loaded() {
641 while let Some(tx) = self.wait_for_dom_content_loaded.pop() {
642 let _ = tx.send(frame.http_request().cloned());
643 }
644 }
645
646 if frame.is_loaded() {
647 while let Some(tx) = self.wait_for_frame_navigation.pop() {
648 let _ = tx.send(frame.http_request().cloned());
649 }
650 }
651
652 if frame.is_network_idle() {
653 while let Some(tx) = self.wait_for_network_idle.pop() {
654 let _ = tx.send(frame.http_request().cloned());
655 }
656 }
657
658 if frame.is_network_almost_idle() {
659 while let Some(tx) = self.wait_for_network_almost_idle.pop() {
660 let _ = tx.send(frame.http_request().cloned());
661 }
662 }
663 }
664
665 if let Some(ev) = self.queued_events.pop_front() {
667 return Some(ev);
668 }
669
670 if let Some(handle) = self.page.as_mut() {
671 while let Poll::Ready(Some(msg)) = handle.rx.poll_recv(cx) {
672 if self.init_state == TargetInit::Closing {
673 break;
674 }
675
676 match msg {
677 TargetMessage::Command(cmd) => {
678 if cmd.method == "Network.setBlockedURLs" {
679 if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array())
680 {
681 let mut unblock_all = false;
682 let mut block_all = false;
683
684 for s in arr.iter().filter_map(|v| v.as_str()) {
685 if s == "!*" {
686 unblock_all = true;
687 break; }
689 if s.contains('*') {
690 block_all = true;
691 }
692 }
693
694 if unblock_all {
695 self.network_manager.set_block_all(false);
696 } else if block_all {
697 self.network_manager.set_block_all(true);
698 }
699 }
700 }
701 self.queued_events.push_back(TargetEvent::Command(cmd));
702 }
703 TargetMessage::MainFrame(tx) => {
704 let _ =
705 tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
706 }
707 TargetMessage::AllFrames(tx) => {
708 let _ = tx.send(
709 self.frame_manager
710 .frames()
711 .map(|f| f.id().clone())
712 .collect(),
713 );
714 }
715 #[cfg(feature = "_cache")]
716 TargetMessage::CacheKey((cache_key, cache_policy)) => {
717 self.network_manager.set_cache_site_key(cache_key);
718 self.network_manager.set_cache_policy(cache_policy);
719 }
720 TargetMessage::Url(req) => {
721 let GetUrl { frame_id, tx } = req;
722 let frame = if let Some(frame_id) = frame_id {
723 self.frame_manager.frame(&frame_id)
724 } else {
725 self.frame_manager.main_frame()
726 };
727 let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
728 }
729 TargetMessage::Name(req) => {
730 let GetName { frame_id, tx } = req;
731 let frame = if let Some(frame_id) = frame_id {
732 self.frame_manager.frame(&frame_id)
733 } else {
734 self.frame_manager.main_frame()
735 };
736 let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
737 }
738 TargetMessage::Parent(req) => {
739 let GetParent { frame_id, tx } = req;
740 let frame = self.frame_manager.frame(&frame_id);
741 let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
742 }
743 TargetMessage::WaitForNavigation(tx) => {
744 if let Some(frame) = self.frame_manager.main_frame() {
745 if frame.is_loaded() {
746 let _ = tx.send(frame.http_request().cloned());
747 } else {
748 self.wait_for_frame_navigation.push(tx);
749 }
750 } else {
751 self.wait_for_frame_navigation.push(tx);
752 }
753 }
754 TargetMessage::WaitForDomContentLoaded(tx) => {
755 if let Some(frame) = self.frame_manager.main_frame() {
756 if frame.is_dom_content_loaded() {
757 let _ = tx.send(frame.http_request().cloned());
758 } else {
759 self.wait_for_dom_content_loaded.push(tx);
760 }
761 } else {
762 self.wait_for_dom_content_loaded.push(tx);
763 }
764 }
765 TargetMessage::WaitForNetworkIdle(tx) => {
766 if let Some(frame) = self.frame_manager.main_frame() {
767 if frame.is_network_idle() {
768 let _ = tx.send(frame.http_request().cloned());
769 } else {
770 self.wait_for_network_idle.push(tx);
771 }
772 } else {
773 self.wait_for_network_idle.push(tx);
774 }
775 }
776 TargetMessage::WaitForNetworkAlmostIdle(tx) => {
777 if let Some(frame) = self.frame_manager.main_frame() {
778 if frame.is_network_almost_idle() {
779 let _ = tx.send(frame.http_request().cloned());
780 } else {
781 self.wait_for_network_almost_idle.push(tx);
782 }
783 } else {
784 self.wait_for_network_almost_idle.push(tx);
785 }
786 }
787 TargetMessage::AddEventListener(req) => {
788 if req.method == "Fetch.requestPaused" {
789 self.network_manager.enable_request_intercept();
790 }
791 self.event_listeners.add_listener(req);
793 }
794 TargetMessage::GetExecutionContext(ctx) => {
795 let GetExecutionContext {
796 dom_world,
797 frame_id,
798 tx,
799 } = ctx;
800 let frame = if let Some(frame_id) = frame_id {
801 self.frame_manager.frame(&frame_id)
802 } else {
803 self.frame_manager.main_frame()
804 };
805
806 if let Some(frame) = frame {
807 match dom_world {
808 DOMWorldKind::Main => {
809 let _ = tx.send(frame.main_world().execution_context());
810 }
811 DOMWorldKind::Secondary => {
812 let _ =
813 tx.send(frame.secondary_world().execution_context());
814 }
815 }
816 } else {
817 let _ = tx.send(None);
818 }
819 }
820 TargetMessage::Authenticate(credentials) => {
821 self.network_manager.authenticate(credentials);
822 }
823 TargetMessage::BlockNetwork(blocked) => {
824 self.network_manager.set_block_all(blocked);
825 }
826 TargetMessage::EnableInterception(enabled) => {
827 self.network_manager.user_request_interception_enabled = !enabled;
829 }
830 }
831 }
832 }
833
834 while let Some(event) = self.network_manager.poll() {
835 if self.init_state == TargetInit::Closing {
836 break;
837 }
838 match event {
839 NetworkEvent::SendCdpRequest((method, params)) => {
840 self.queued_events.push_back(TargetEvent::Request(Request {
842 method,
843 session_id: self.session_id.clone().map(Into::into),
844 params,
845 }))
846 }
847 NetworkEvent::Request(_) => {}
848 NetworkEvent::Response(_) => {}
849 NetworkEvent::RequestFailed(request) => {
850 self.frame_manager.on_http_request_finished(request);
851 }
852 NetworkEvent::RequestFinished(request) => {
853 self.frame_manager.on_http_request_finished(request);
854 }
855 NetworkEvent::BytesConsumed(n) => {
856 self.queued_events.push_back(TargetEvent::BytesConsumed(n));
857 }
858 }
859 }
860
861 while let Some(event) = self.frame_manager.poll(now) {
862 if self.init_state == TargetInit::Closing {
863 break;
864 }
865 match event {
866 FrameEvent::NavigationResult(res) => {
867 self.queued_events
868 .push_back(TargetEvent::NavigationResult(res));
869 }
870 FrameEvent::NavigationRequest(id, req) => {
871 self.queued_events
872 .push_back(TargetEvent::NavigationRequest(id, req));
873 }
874 }
875 }
876
877 if self.queued_events.is_empty() {
878 return None;
879 }
880 }
881 }
882
883 pub(crate) fn on_page_message(&mut self, msg: TargetMessage) {
887 if self.init_state == TargetInit::Closing {
888 return;
889 }
890 match msg {
891 TargetMessage::Command(cmd) => {
892 if cmd.method == "Network.setBlockedURLs" {
893 if let Some(arr) = cmd.params.get("urls").and_then(|v| v.as_array()) {
894 let mut unblock_all = false;
895 let mut block_all = false;
896 for s in arr.iter().filter_map(|v| v.as_str()) {
897 if s == "!*" {
898 unblock_all = true;
899 break;
900 }
901 if s.contains('*') {
902 block_all = true;
903 }
904 }
905 if unblock_all {
906 self.network_manager.set_block_all(false);
907 } else if block_all {
908 self.network_manager.set_block_all(true);
909 }
910 }
911 }
912 self.queued_events.push_back(TargetEvent::Command(cmd));
913 }
914 TargetMessage::MainFrame(tx) => {
915 let _ = tx.send(self.frame_manager.main_frame().map(|f| f.id().clone()));
916 }
917 TargetMessage::AllFrames(tx) => {
918 let _ = tx.send(
919 self.frame_manager
920 .frames()
921 .map(|f| f.id().clone())
922 .collect(),
923 );
924 }
925 #[cfg(feature = "_cache")]
926 TargetMessage::CacheKey((cache_key, cache_policy)) => {
927 self.network_manager.set_cache_site_key(cache_key);
928 self.network_manager.set_cache_policy(cache_policy);
929 }
930 TargetMessage::Url(req) => {
931 let GetUrl { frame_id, tx } = req;
932 let frame = if let Some(frame_id) = frame_id {
933 self.frame_manager.frame(&frame_id)
934 } else {
935 self.frame_manager.main_frame()
936 };
937 let _ = tx.send(frame.and_then(|f| f.url().map(str::to_string)));
938 }
939 TargetMessage::Name(req) => {
940 let GetName { frame_id, tx } = req;
941 let frame = if let Some(frame_id) = frame_id {
942 self.frame_manager.frame(&frame_id)
943 } else {
944 self.frame_manager.main_frame()
945 };
946 let _ = tx.send(frame.and_then(|f| f.name().map(str::to_string)));
947 }
948 TargetMessage::Parent(req) => {
949 let GetParent { frame_id, tx } = req;
950 let frame = self.frame_manager.frame(&frame_id);
951 let _ = tx.send(frame.and_then(|f| f.parent_id().cloned()));
952 }
953 TargetMessage::WaitForNavigation(tx) => {
954 if let Some(frame) = self.frame_manager.main_frame() {
955 if frame.is_loaded() {
956 let _ = tx.send(frame.http_request().cloned());
957 } else {
958 self.wait_for_frame_navigation.push(tx);
959 }
960 } else {
961 self.wait_for_frame_navigation.push(tx);
962 }
963 }
964 TargetMessage::WaitForDomContentLoaded(tx) => {
965 if let Some(frame) = self.frame_manager.main_frame() {
966 if frame.is_dom_content_loaded() {
967 let _ = tx.send(frame.http_request().cloned());
968 } else {
969 self.wait_for_dom_content_loaded.push(tx);
970 }
971 } else {
972 self.wait_for_dom_content_loaded.push(tx);
973 }
974 }
975 TargetMessage::WaitForNetworkIdle(tx) => {
976 if let Some(frame) = self.frame_manager.main_frame() {
977 if frame.is_network_idle() {
978 let _ = tx.send(frame.http_request().cloned());
979 } else {
980 self.wait_for_network_idle.push(tx);
981 }
982 } else {
983 self.wait_for_network_idle.push(tx);
984 }
985 }
986 TargetMessage::WaitForNetworkAlmostIdle(tx) => {
987 if let Some(frame) = self.frame_manager.main_frame() {
988 if frame.is_network_almost_idle() {
989 let _ = tx.send(frame.http_request().cloned());
990 } else {
991 self.wait_for_network_almost_idle.push(tx);
992 }
993 } else {
994 self.wait_for_network_almost_idle.push(tx);
995 }
996 }
997 TargetMessage::AddEventListener(req) => {
998 if req.method == "Fetch.requestPaused" {
999 self.network_manager.enable_request_intercept();
1000 }
1001 self.event_listeners.add_listener(req);
1002 }
1003 TargetMessage::GetExecutionContext(ctx) => {
1004 let GetExecutionContext {
1005 dom_world,
1006 frame_id,
1007 tx,
1008 } = ctx;
1009 let frame = if let Some(frame_id) = frame_id {
1010 self.frame_manager.frame(&frame_id)
1011 } else {
1012 self.frame_manager.main_frame()
1013 };
1014 if let Some(frame) = frame {
1015 match dom_world {
1016 DOMWorldKind::Main => {
1017 let _ = tx.send(frame.main_world().execution_context());
1018 }
1019 DOMWorldKind::Secondary => {
1020 let _ = tx.send(frame.secondary_world().execution_context());
1021 }
1022 }
1023 } else {
1024 let _ = tx.send(None);
1025 }
1026 }
1027 TargetMessage::Authenticate(credentials) => {
1028 self.network_manager.authenticate(credentials);
1029 }
1030 TargetMessage::BlockNetwork(blocked) => {
1031 self.network_manager.set_block_all(blocked);
1032 }
1033 TargetMessage::EnableInterception(enabled) => {
1034 self.network_manager.user_request_interception_enabled = !enabled;
1035 }
1036 }
1037 }
1038
1039 pub(crate) fn advance(&mut self, now: Instant) -> Option<TargetEvent> {
1044 if !self.is_page() {
1045 return None;
1046 }
1047
1048 match &mut self.init_state {
1050 TargetInit::AttachToTarget => {
1051 self.init_state = TargetInit::InitializingFrame(FrameManager::init_commands(
1052 self.config.request_timeout,
1053 ));
1054 if let Ok(params) = AttachToTargetParams::builder()
1055 .target_id(self.target_id().clone())
1056 .flatten(true)
1057 .build()
1058 {
1059 return Some(TargetEvent::Request(Request::new(
1060 params.identifier(),
1061 serde_json::to_value(params).unwrap_or_default(),
1062 )));
1063 } else {
1064 return None;
1065 }
1066 }
1067 TargetInit::InitializingFrame(cmds) => {
1068 self.session_id.as_ref()?;
1069 if let Poll::Ready(poll) = cmds.poll(now) {
1070 return match poll {
1071 None => {
1072 if let Some(world_name) = self.frame_manager.get_isolated_world_name() {
1073 let world_name = world_name.clone();
1074 if let Some(isolated_world_cmds) =
1075 self.frame_manager.ensure_isolated_world(&world_name)
1076 {
1077 *cmds = isolated_world_cmds;
1078 } else {
1079 self.init_state = TargetInit::InitializingNetwork(
1080 self.network_manager.init_commands(),
1081 );
1082 }
1083 } else {
1084 self.init_state = TargetInit::InitializingNetwork(
1085 self.network_manager.init_commands(),
1086 );
1087 }
1088 self.advance(now)
1089 }
1090 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
1091 method,
1092 session_id: self.session_id.clone().map(Into::into),
1093 params,
1094 })),
1095 Some(Err(_)) => Some(self.on_initialization_failed()),
1096 };
1097 } else {
1098 return None;
1099 }
1100 }
1101 TargetInit::InitializingNetwork(cmds) => {
1102 if let Poll::Ready(poll) = cmds.poll(now) {
1103 return match poll {
1104 None => {
1105 self.init_state = TargetInit::InitializingPage(
1106 Self::page_init_commands(self.config.request_timeout),
1107 );
1108 self.advance(now)
1109 }
1110 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
1111 method,
1112 session_id: self.session_id.clone().map(Into::into),
1113 params,
1114 })),
1115 Some(Err(_)) => Some(self.on_initialization_failed()),
1116 };
1117 } else {
1118 return None;
1119 }
1120 }
1121 TargetInit::InitializingPage(cmds) => {
1122 if let Poll::Ready(poll) = cmds.poll(now) {
1123 return match poll {
1124 None => {
1125 self.init_state = match self.config.viewport.as_ref() {
1126 Some(viewport) => TargetInit::InitializingEmulation(
1127 self.emulation_manager.init_commands(viewport),
1128 ),
1129 None => TargetInit::Initialized,
1130 };
1131 self.advance(now)
1132 }
1133 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
1134 method,
1135 session_id: self.session_id.clone().map(Into::into),
1136 params,
1137 })),
1138 Some(Err(_)) => Some(self.on_initialization_failed()),
1139 };
1140 } else {
1141 return None;
1142 }
1143 }
1144 TargetInit::InitializingEmulation(cmds) => {
1145 if let Poll::Ready(poll) = cmds.poll(now) {
1146 return match poll {
1147 None => {
1148 self.init_state = TargetInit::Initialized;
1149 self.advance(now)
1150 }
1151 Some(Ok((method, params))) => Some(TargetEvent::Request(Request {
1152 method,
1153 session_id: self.session_id.clone().map(Into::into),
1154 params,
1155 })),
1156 Some(Err(_)) => Some(self.on_initialization_failed()),
1157 };
1158 } else {
1159 return None;
1160 }
1161 }
1162 TargetInit::Initialized => {
1163 if let Some(initiator) = self.initiator.take() {
1164 if self
1165 .frame_manager
1166 .main_frame()
1167 .map(|frame| frame.is_loaded())
1168 .unwrap_or_default()
1169 {
1170 if let Some(page) = self.get_or_create_page() {
1171 let _ = initiator.send(Ok(page.clone().into()));
1172 } else {
1173 self.initiator = Some(initiator);
1174 }
1175 } else {
1176 self.initiator = Some(initiator);
1177 }
1178 }
1179 }
1180 TargetInit::Closing => return None,
1181 };
1182
1183 if !self.wait_for_frame_navigation.is_empty() {
1185 self.wait_for_frame_navigation.retain(|tx| !tx.is_closed());
1186 }
1187 if !self.wait_for_dom_content_loaded.is_empty() {
1188 self.wait_for_dom_content_loaded
1189 .retain(|tx| !tx.is_closed());
1190 }
1191 if !self.wait_for_network_idle.is_empty() {
1192 self.wait_for_network_idle.retain(|tx| !tx.is_closed());
1193 }
1194 if !self.wait_for_network_almost_idle.is_empty() {
1195 self.wait_for_network_almost_idle
1196 .retain(|tx| !tx.is_closed());
1197 }
1198
1199 loop {
1201 if self.init_state == TargetInit::Closing {
1202 break None;
1203 }
1204
1205 if let Some(frame) = self.frame_manager.main_frame() {
1206 if frame.is_dom_content_loaded() {
1207 while let Some(tx) = self.wait_for_dom_content_loaded.pop() {
1208 let _ = tx.send(frame.http_request().cloned());
1209 }
1210 }
1211 if frame.is_loaded() {
1212 while let Some(tx) = self.wait_for_frame_navigation.pop() {
1213 let _ = tx.send(frame.http_request().cloned());
1214 }
1215 }
1216 if frame.is_network_idle() {
1217 while let Some(tx) = self.wait_for_network_idle.pop() {
1218 let _ = tx.send(frame.http_request().cloned());
1219 }
1220 }
1221 if frame.is_network_almost_idle() {
1222 while let Some(tx) = self.wait_for_network_almost_idle.pop() {
1223 let _ = tx.send(frame.http_request().cloned());
1224 }
1225 }
1226 }
1227
1228 if let Some(ev) = self.queued_events.pop_front() {
1229 return Some(ev);
1230 }
1231
1232 while let Some(event) = self.network_manager.poll() {
1233 if self.init_state == TargetInit::Closing {
1234 break;
1235 }
1236 match event {
1237 NetworkEvent::SendCdpRequest((method, params)) => {
1238 self.queued_events.push_back(TargetEvent::Request(Request {
1239 method,
1240 session_id: self.session_id.clone().map(Into::into),
1241 params,
1242 }));
1243 }
1244 NetworkEvent::Request(_) => {}
1245 NetworkEvent::Response(_) => {}
1246 NetworkEvent::RequestFailed(request) => {
1247 self.frame_manager.on_http_request_finished(request);
1248 }
1249 NetworkEvent::RequestFinished(request) => {
1250 self.frame_manager.on_http_request_finished(request);
1251 }
1252 NetworkEvent::BytesConsumed(n) => {
1253 self.queued_events.push_back(TargetEvent::BytesConsumed(n));
1254 }
1255 }
1256 }
1257
1258 while let Some(event) = self.frame_manager.poll(now) {
1259 if self.init_state == TargetInit::Closing {
1260 break;
1261 }
1262 match event {
1263 FrameEvent::NavigationResult(res) => {
1264 self.queued_events
1265 .push_back(TargetEvent::NavigationResult(res));
1266 }
1267 FrameEvent::NavigationRequest(id, req) => {
1268 self.queued_events
1269 .push_back(TargetEvent::NavigationRequest(id, req));
1270 }
1271 }
1272 }
1273
1274 if self.queued_events.is_empty() {
1275 return None;
1276 }
1277 }
1278 }
1279
1280 pub fn set_initiator(&mut self, tx: Sender<Result<Page>>) {
1283 self.initiator = Some(tx);
1284 }
1285
1286 pub(crate) fn page_init_commands(timeout: Duration) -> CommandChain {
1287 CommandChain::new(INIT_COMMANDS_PARAMS.clone(), timeout)
1288 }
1289}
1290
1291#[derive(Debug, Clone)]
1293pub struct TargetConfig {
1294 pub ignore_https_errors: bool,
1297 pub request_timeout: Duration,
1300 pub viewport: Option<Viewport>,
1303 pub request_intercept: bool,
1306 pub cache_enabled: bool,
1309 pub ignore_visuals: bool,
1312 pub ignore_javascript: bool,
1315 pub ignore_analytics: bool,
1318 pub ignore_prefetch: bool,
1320 pub ignore_stylesheets: bool,
1323 pub only_html: bool,
1326 pub service_worker_enabled: bool,
1329 pub extra_headers: Option<std::collections::HashMap<String, String>>,
1332 pub intercept_manager: NetworkInterceptManager,
1335 pub max_bytes_allowed: Option<u64>,
1338 pub whitelist_patterns: Option<Vec<String>>,
1340 pub blacklist_patterns: Option<Vec<String>>,
1342 #[cfg(feature = "adblock")]
1344 pub adblock_filter_rules: Option<Vec<String>>,
1345 pub page_wake: Option<Arc<Notify>>,
1348}
1349
1350impl Default for TargetConfig {
1351 fn default() -> Self {
1352 Self {
1353 ignore_https_errors: true,
1354 request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1355 viewport: Default::default(),
1356 request_intercept: false,
1357 cache_enabled: true,
1358 service_worker_enabled: true,
1359 ignore_javascript: false,
1360 ignore_visuals: false,
1361 ignore_stylesheets: false,
1362 ignore_analytics: true,
1363 ignore_prefetch: true,
1364 only_html: false,
1365 extra_headers: Default::default(),
1366 intercept_manager: NetworkInterceptManager::Unknown,
1367 max_bytes_allowed: None,
1368 whitelist_patterns: None,
1369 blacklist_patterns: None,
1370 #[cfg(feature = "adblock")]
1371 adblock_filter_rules: None,
1372 page_wake: None,
1373 }
1374 }
1375}
1376
1377#[derive(Debug, Clone, Eq, PartialEq)]
1378pub enum TargetType {
1379 Page,
1380 BackgroundPage,
1381 ServiceWorker,
1382 SharedWorker,
1383 Other,
1384 Browser,
1385 Webview,
1386 Unknown(String),
1387}
1388
1389impl TargetType {
1390 pub fn new(ty: &str) -> Self {
1391 match ty {
1392 "page" => TargetType::Page,
1393 "background_page" => TargetType::BackgroundPage,
1394 "service_worker" => TargetType::ServiceWorker,
1395 "shared_worker" => TargetType::SharedWorker,
1396 "other" => TargetType::Other,
1397 "browser" => TargetType::Browser,
1398 "webview" => TargetType::Webview,
1399 s => TargetType::Unknown(s.to_string()),
1400 }
1401 }
1402
1403 pub fn is_page(&self) -> bool {
1404 matches!(self, TargetType::Page)
1405 }
1406
1407 pub fn is_background_page(&self) -> bool {
1408 matches!(self, TargetType::BackgroundPage)
1409 }
1410
1411 pub fn is_service_worker(&self) -> bool {
1412 matches!(self, TargetType::ServiceWorker)
1413 }
1414
1415 pub fn is_shared_worker(&self) -> bool {
1416 matches!(self, TargetType::SharedWorker)
1417 }
1418
1419 pub fn is_other(&self) -> bool {
1420 matches!(self, TargetType::Other)
1421 }
1422
1423 pub fn is_browser(&self) -> bool {
1424 matches!(self, TargetType::Browser)
1425 }
1426
1427 pub fn is_webview(&self) -> bool {
1428 matches!(self, TargetType::Webview)
1429 }
1430}
1431
1432#[derive(Debug)]
1433pub(crate) enum TargetEvent {
1434 Request(Request),
1436 NavigationRequest(NavigationId, Request),
1438 NavigationResult(Result<NavigationOk, NavigationError>),
1440 Command(CommandMessage),
1442 BytesConsumed(u64),
1444}
1445
1446#[derive(Debug, PartialEq)]
1448pub enum TargetInit {
1449 InitializingFrame(CommandChain),
1450 InitializingNetwork(CommandChain),
1451 InitializingPage(CommandChain),
1452 InitializingEmulation(CommandChain),
1453 AttachToTarget,
1454 Initialized,
1455 Closing,
1456}
1457
1458impl TargetInit {
1459 fn commands_mut(&mut self) -> Option<&mut CommandChain> {
1460 match self {
1461 TargetInit::InitializingFrame(cmd) => Some(cmd),
1462 TargetInit::InitializingNetwork(cmd) => Some(cmd),
1463 TargetInit::InitializingPage(cmd) => Some(cmd),
1464 TargetInit::InitializingEmulation(cmd) => Some(cmd),
1465 TargetInit::AttachToTarget => None,
1466 TargetInit::Initialized => None,
1467 TargetInit::Closing => None,
1468 }
1469 }
1470}
1471
1472#[derive(Debug)]
1473pub struct GetExecutionContext {
1474 pub dom_world: DOMWorldKind,
1476 pub frame_id: Option<FrameId>,
1478 pub tx: Sender<Option<ExecutionContextId>>,
1480}
1481
1482impl GetExecutionContext {
1483 pub fn new(tx: Sender<Option<ExecutionContextId>>) -> Self {
1484 Self {
1485 dom_world: DOMWorldKind::Main,
1486 frame_id: None,
1487 tx,
1488 }
1489 }
1490}
1491
1492#[derive(Debug)]
1493pub struct GetUrl {
1494 pub frame_id: Option<FrameId>,
1496 pub tx: Sender<Option<String>>,
1498}
1499
1500impl GetUrl {
1501 pub fn new(tx: Sender<Option<String>>) -> Self {
1502 Self { frame_id: None, tx }
1503 }
1504}
1505
1506#[derive(Debug)]
1507pub struct GetName {
1508 pub frame_id: Option<FrameId>,
1510 pub tx: Sender<Option<String>>,
1512}
1513
1514#[derive(Debug)]
1515pub struct GetParent {
1516 pub frame_id: FrameId,
1518 pub tx: Sender<Option<FrameId>>,
1520}
1521
1522#[derive(Debug)]
1523pub enum TargetMessage {
1524 Command(CommandMessage),
1526 MainFrame(Sender<Option<FrameId>>),
1528 AllFrames(Sender<Vec<FrameId>>),
1530 #[cfg(feature = "_cache")]
1531 CacheKey((Option<String>, Option<crate::cache::BasicCachePolicy>)),
1533 Url(GetUrl),
1535 Name(GetName),
1537 Parent(GetParent),
1539 WaitForNavigation(Sender<ArcHttpRequest>),
1541 WaitForDomContentLoaded(Sender<ArcHttpRequest>),
1544 WaitForNetworkIdle(Sender<ArcHttpRequest>),
1546 WaitForNetworkAlmostIdle(Sender<ArcHttpRequest>),
1548 AddEventListener(EventListenerRequest),
1551 GetExecutionContext(GetExecutionContext),
1553 Authenticate(Credentials),
1554 BlockNetwork(bool),
1556 EnableInterception(bool),
1558}