1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10 AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11 EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12 EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17 browser_protocol::page::{self, FrameId},
18 js_protocol::runtime,
19};
20use chromiumoxide_types::{Method, MethodId, Request};
21
22use crate::error::DeadlineExceeded;
23use crate::handler::domworld::DOMWorld;
24use crate::handler::http::HttpRequest;
25use crate::handler::REQUEST_TIMEOUT;
26use crate::{cmd::CommandChain, ArcHttpRequest};
27
28pub const UTILITY_WORLD_NAME: &str = "__chromiumoxide_utility_world__";
29const EVALUATION_SCRIPT_URL: &str = "____chromiumoxide_utility_world___evaluation_script__";
30
31lazy_static::lazy_static! {
32 static ref CHROME_SPOOF_RUNTIME: bool = {
34 std::env::var("CHROME_SPOOF_RUNTIME").unwrap_or_else(|_| "false".to_string()) == "true"
35 };
36}
37
38#[derive(Debug)]
40pub struct Frame {
41 parent_frame: Option<FrameId>,
42 id: FrameId,
44 main_world: DOMWorld,
45 secondary_world: DOMWorld,
46 loader_id: Option<LoaderId>,
47 url: Option<String>,
49 http_request: ArcHttpRequest,
51 child_frames: HashSet<FrameId>,
53 name: Option<String>,
54 lifecycle_events: HashSet<MethodId>,
56}
57
58impl Frame {
59 pub fn new(id: FrameId) -> Self {
60 Self {
61 parent_frame: None,
62 id,
63 main_world: Default::default(),
64 secondary_world: Default::default(),
65 loader_id: None,
66 url: None,
67 http_request: None,
68 child_frames: Default::default(),
69 name: None,
70 lifecycle_events: Default::default(),
71 }
72 }
73
74 pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
75 parent.child_frames.insert(id.clone());
76 Self {
77 parent_frame: Some(parent.id.clone()),
78 id,
79 main_world: Default::default(),
80 secondary_world: Default::default(),
81 loader_id: None,
82 url: None,
83 http_request: None,
84 child_frames: Default::default(),
85 name: None,
86 lifecycle_events: Default::default(),
87 }
88 }
89
90 pub fn parent_id(&self) -> Option<&FrameId> {
91 self.parent_frame.as_ref()
92 }
93
94 pub fn id(&self) -> &FrameId {
95 &self.id
96 }
97
98 pub fn url(&self) -> Option<&str> {
99 self.url.as_deref()
100 }
101
102 pub fn name(&self) -> Option<&str> {
103 self.name.as_deref()
104 }
105
106 pub fn main_world(&self) -> &DOMWorld {
107 &self.main_world
108 }
109
110 pub fn secondary_world(&self) -> &DOMWorld {
111 &self.secondary_world
112 }
113
114 pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
115 &self.lifecycle_events
116 }
117
118 pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
119 self.http_request.as_ref()
120 }
121
122 fn navigated(&mut self, frame: &CdpFrame) {
123 self.name.clone_from(&frame.name);
124 let url = if let Some(ref fragment) = frame.url_fragment {
125 format!("{}{fragment}", frame.url)
126 } else {
127 frame.url.clone()
128 };
129 self.url = Some(url);
130 }
131
132 fn navigated_within_url(&mut self, url: String) {
133 self.url = Some(url)
134 }
135
136 fn on_loading_stopped(&mut self) {
137 self.lifecycle_events.insert("DOMContentLoaded".into());
138 self.lifecycle_events.insert("load".into());
139 }
140
141 fn on_loading_started(&mut self) {
142 self.lifecycle_events.clear();
143 self.http_request.take();
144 }
145
146 pub fn is_loaded(&self) -> bool {
147 self.lifecycle_events.contains("load")
148 }
149
150 pub fn clear_contexts(&mut self) {
151 self.main_world.take_context();
152 self.secondary_world.take_context();
153 }
154
155 pub fn destroy_context(&mut self, ctx_unique_id: &str) {
156 if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
157 self.main_world.take_context();
158 } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
159 self.secondary_world.take_context();
160 }
161 }
162
163 pub fn execution_context(&self) -> Option<ExecutionContextId> {
164 self.main_world.execution_context()
165 }
166
167 pub fn set_request(&mut self, request: HttpRequest) {
168 self.http_request = Some(Arc::new(request))
169 }
170}
171
172impl From<CdpFrame> for Frame {
173 fn from(frame: CdpFrame) -> Self {
174 Self {
175 parent_frame: frame.parent_id.map(From::from),
176 id: frame.id,
177 main_world: Default::default(),
178 secondary_world: Default::default(),
179 loader_id: Some(frame.loader_id),
180 url: Some(frame.url),
181 http_request: None,
182 child_frames: Default::default(),
183 name: frame.name,
184 lifecycle_events: Default::default(),
185 }
186 }
187}
188
189#[derive(Debug)]
193pub struct FrameManager {
194 main_frame: Option<FrameId>,
195 frames: HashMap<FrameId, Frame>,
196 context_ids: HashMap<String, FrameId>,
198 isolated_worlds: HashSet<String>,
199 request_timeout: Duration,
202 pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
204 navigation: Option<(NavigationWatcher, Instant)>,
206}
207
208impl FrameManager {
209 pub fn new(request_timeout: Duration) -> Self {
210 FrameManager {
211 main_frame: None,
212 frames: Default::default(),
213 context_ids: Default::default(),
214 isolated_worlds: Default::default(),
215 request_timeout,
216 pending_navigations: Default::default(),
217 navigation: None,
218 }
219 }
220
221 pub fn init_commands(timeout: Duration) -> CommandChain {
223 let enable = page::EnableParams::default();
224 let get_tree = page::GetFrameTreeParams::default();
225 let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
226 let enable_runtime = runtime::EnableParams::default();
227 let disable_runtime = runtime::DisableParams::default();
228
229 let mut commands = Vec::with_capacity(if *CHROME_SPOOF_RUNTIME { 5 } else { 4 });
230
231 let enable_id = enable.identifier();
232 let get_tree_id = get_tree.identifier();
233 let set_lifecycle_id = set_lifecycle.identifier();
234 let enable_runtime_id = enable_runtime.identifier();
235 let disable_runtime_id = disable_runtime.identifier();
236
237 if let Ok(value) = serde_json::to_value(enable) {
238 commands.push((enable_id, value));
239 }
240
241 if let Ok(value) = serde_json::to_value(get_tree) {
242 commands.push((get_tree_id, value));
243 }
244
245 if let Ok(value) = serde_json::to_value(set_lifecycle) {
246 commands.push((set_lifecycle_id, value));
247 }
248
249 if let Ok(value) = serde_json::to_value(enable_runtime) {
250 commands.push((enable_runtime_id, value));
251 }
252
253 if *CHROME_SPOOF_RUNTIME {
254 if let Ok(value) = serde_json::to_value(disable_runtime) {
255 commands.push((disable_runtime_id, value));
256 }
257 }
258
259 CommandChain::new(commands, timeout)
260 }
261
262 pub fn main_frame(&self) -> Option<&Frame> {
263 self.main_frame.as_ref().and_then(|id| self.frames.get(id))
264 }
265
266 pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
267 if let Some(id) = self.main_frame.as_ref() {
268 self.frames.get_mut(id)
269 } else {
270 None
271 }
272 }
273
274 pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
275 self.frames.values()
276 }
277
278 pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
279 self.frames.get(id)
280 }
281
282 fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
283 watcher.expected_lifecycle.iter().all(|ev| {
284 frame.lifecycle_events.contains(ev)
285 || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
286 }) && frame
287 .child_frames
288 .iter()
289 .filter_map(|f| self.frames.get(f))
290 .all(|f| self.check_lifecycle(watcher, f))
291 }
292
293 fn check_lifecycle_complete(
294 &self,
295 watcher: &NavigationWatcher,
296 frame: &Frame,
297 ) -> Option<NavigationOk> {
298 if !self.check_lifecycle(watcher, frame) {
299 return None;
300 }
301 if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
302 return None;
303 }
304 if watcher.same_document_navigation {
305 return Some(NavigationOk::SameDocumentNavigation(watcher.id));
306 }
307 if frame.loader_id != watcher.loader_id {
308 return Some(NavigationOk::NewDocumentNavigation(watcher.id));
309 }
310 None
311 }
312
313 pub fn on_http_request_finished(&mut self, request: HttpRequest) {
315 if let Some(id) = request.frame.as_ref() {
316 if let Some(frame) = self.frames.get_mut(id) {
317 frame.set_request(request);
318 }
319 }
320 }
321
322 pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
323 if let Some((watcher, deadline)) = self.navigation.take() {
325 if now > deadline {
326 return Some(FrameEvent::NavigationResult(Err(
328 NavigationError::Timeout {
329 err: DeadlineExceeded::new(now, deadline),
330 id: watcher.id,
331 },
332 )));
333 }
334
335 if let Some(frame) = self.frames.get(&watcher.frame_id) {
336 if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
337 return Some(FrameEvent::NavigationResult(Ok(nav)));
340 } else {
341 self.navigation = Some((watcher, deadline));
343 }
344 } else {
345 return Some(FrameEvent::NavigationResult(Err(
346 NavigationError::FrameNotFound {
347 frame: watcher.frame_id,
348 id: watcher.id,
349 },
350 )));
351 }
352 } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
353 let deadline = Instant::now() + req.timeout;
355 self.navigation = Some((watcher, deadline));
356 return Some(FrameEvent::NavigationRequest(req.id, req.req));
357 }
358 None
359 }
360
361 pub fn goto(&mut self, req: FrameRequestedNavigation) {
363 if let Some(frame_id) = &self.main_frame {
364 self.navigate_frame(frame_id.clone(), req);
365 }
366 }
367
368 pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
370 let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
371 let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
372
373 req.set_frame_id(frame_id);
375
376 self.pending_navigations.push_back((req, watcher))
377 }
378
379 pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
381 }
383
384 pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
385 self.on_frame_attached(
386 frame_tree.frame.id.clone(),
387 frame_tree.frame.parent_id.clone().map(Into::into),
388 );
389 self.on_frame_navigated(&frame_tree.frame);
390 if let Some(children) = frame_tree.child_frames {
391 for child_tree in children {
392 self.on_frame_tree(child_tree);
393 }
394 }
395 }
396
397 pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
398 if self.frames.contains_key(&frame_id) {
399 return;
400 }
401 if let Some(parent_frame_id) = parent_frame_id {
402 if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
403 let frame = Frame::with_parent(frame_id.clone(), parent_frame);
404 self.frames.insert(frame_id, frame);
405 }
406 }
407 }
408
409 pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
410 self.remove_frames_recursively(&event.frame_id);
411 }
412
413 pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
414 if frame.parent_id.is_some() {
415 if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
416 for child in f.child_frames.drain() {
417 self.remove_frames_recursively(&child);
418 }
419 f.navigated(frame);
420 self.frames.insert(id, f);
421 }
422 } else {
423 let mut f = if let Some(main) = self.main_frame.take() {
424 if let Some(mut main_frame) = self.frames.remove(&main) {
426 for child in &main_frame.child_frames {
427 self.remove_frames_recursively(child);
428 }
429 main_frame.child_frames.clear();
431 main_frame.id = frame.id.clone();
432 main_frame
433 } else {
434 Frame::new(frame.id.clone())
435 }
436 } else {
437 Frame::new(frame.id.clone())
439 };
440 f.navigated(frame);
441 self.main_frame = Some(f.id.clone());
442 self.frames.insert(f.id.clone(), f);
443 }
444 }
445
446 pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
447 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
448 frame.navigated_within_url(event.url.clone());
449 }
450 if let Some((watcher, _)) = self.navigation.as_mut() {
451 watcher.on_frame_navigated_within_document(event);
452 }
453 }
454
455 pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
456 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
457 frame.on_loading_stopped();
458 }
459 }
460
461 pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
463 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
464 frame.on_loading_started();
465 }
466 }
467
468 pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
470
471 pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
473 if let Some(frame_id) = event
474 .context
475 .aux_data
476 .as_ref()
477 .and_then(|v| v["frameId"].as_str())
478 {
479 if let Some(frame) = self.frames.get_mut(frame_id) {
480 if event
481 .context
482 .aux_data
483 .as_ref()
484 .and_then(|v| v["isDefault"].as_bool())
485 .unwrap_or_default()
486 {
487 frame
488 .main_world
489 .set_context(event.context.id, event.context.unique_id.clone());
490 } else if event.context.name == UTILITY_WORLD_NAME
491 && frame.secondary_world.execution_context().is_none()
492 {
493 frame
494 .secondary_world
495 .set_context(event.context.id, event.context.unique_id.clone());
496 }
497 self.context_ids
498 .insert(event.context.unique_id.clone(), frame.id.clone());
499 }
500 }
501 if event
502 .context
503 .aux_data
504 .as_ref()
505 .filter(|v| v["type"].as_str() == Some("isolated"))
506 .is_some()
507 {
508 self.isolated_worlds.insert(event.context.name.clone());
509 }
510 }
511
512 pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
514 if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
515 if let Some(frame) = self.frames.get_mut(&id) {
516 frame.destroy_context(&event.execution_context_unique_id);
517 }
518 }
519 }
520
521 pub fn on_execution_contexts_cleared(&mut self) {
523 for id in self.context_ids.values() {
524 if let Some(frame) = self.frames.get_mut(id) {
525 frame.clear_contexts();
526 }
527 }
528 self.context_ids.clear()
529 }
530
531 pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
533 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
534 if event.name == "init" {
535 frame.loader_id = Some(event.loader_id.clone());
536 frame.lifecycle_events.clear();
537 }
538 frame.lifecycle_events.insert(event.name.clone().into());
539 }
540 }
541
542 fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
544 if let Some(mut frame) = self.frames.remove(id) {
545 for child in &frame.child_frames {
546 self.remove_frames_recursively(child);
547 }
548 if let Some(parent_id) = frame.parent_frame.take() {
549 if let Some(parent) = self.frames.get_mut(&parent_id) {
550 parent.child_frames.remove(&frame.id);
551 }
552 }
553 Some(frame)
554 } else {
555 None
556 }
557 }
558
559 pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
560 if self.isolated_worlds.contains(world_name) {
561 return None;
562 }
563
564 self.isolated_worlds.insert(world_name.to_string());
565
566 let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
567 .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
568 .world_name(world_name)
569 .build()
570 .unwrap();
571
572 let mut cmds = Vec::with_capacity(self.frames.len() + 1);
573
574 cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
575
576 let cm = self.frames.keys().filter_map(|id| {
577 if let Ok(cmd) = CreateIsolatedWorldParams::builder()
578 .frame_id(id.clone())
579 .grant_univeral_access(true)
580 .world_name(world_name)
581 .build()
582 {
583 let cm = (
584 cmd.identifier(),
585 serde_json::to_value(cmd).unwrap_or_default(),
586 );
587
588 Some(cm)
589 } else {
590 None
591 }
592 });
593
594 cmds.extend(cm);
595
596 Some(CommandChain::new(cmds, self.request_timeout))
597 }
598}
599
600#[derive(Debug)]
601pub enum FrameEvent {
602 NavigationResult(Result<NavigationOk, NavigationError>),
604 NavigationRequest(NavigationId, Request),
606 }
609
610#[derive(Debug)]
611pub enum NavigationError {
612 Timeout {
613 id: NavigationId,
614 err: DeadlineExceeded,
615 },
616 FrameNotFound {
617 id: NavigationId,
618 frame: FrameId,
619 },
620}
621
622impl NavigationError {
623 pub fn navigation_id(&self) -> &NavigationId {
624 match self {
625 NavigationError::Timeout { id, .. } => id,
626 NavigationError::FrameNotFound { id, .. } => id,
627 }
628 }
629}
630
631#[derive(Debug, Clone, Eq, PartialEq)]
632pub enum NavigationOk {
633 SameDocumentNavigation(NavigationId),
634 NewDocumentNavigation(NavigationId),
635}
636
637impl NavigationOk {
638 pub fn navigation_id(&self) -> &NavigationId {
639 match self {
640 NavigationOk::SameDocumentNavigation(id) => id,
641 NavigationOk::NewDocumentNavigation(id) => id,
642 }
643 }
644}
645
646#[derive(Debug)]
648pub struct NavigationWatcher {
649 id: NavigationId,
650 expected_lifecycle: HashSet<MethodId>,
651 frame_id: FrameId,
652 loader_id: Option<LoaderId>,
653 same_document_navigation: bool,
658}
659
660impl NavigationWatcher {
661 pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
662 Self {
663 id,
664 expected_lifecycle: std::iter::once("load".into()).collect(),
665 loader_id,
666 frame_id: frame,
667 same_document_navigation: false,
668 }
669 }
670
671 pub fn is_lifecycle_complete(&self) -> bool {
673 self.expected_lifecycle.is_empty()
674 }
675
676 fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
677 if self.frame_id == ev.frame_id {
678 self.same_document_navigation = true;
679 }
680 }
681}
682
683#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
685pub struct NavigationId(pub usize);
686
687#[derive(Debug)]
689pub struct FrameRequestedNavigation {
690 pub id: NavigationId,
692 pub req: Request,
694 pub timeout: Duration,
696}
697
698impl FrameRequestedNavigation {
699 pub fn new(id: NavigationId, req: Request) -> Self {
700 Self {
701 id,
702 req,
703 timeout: Duration::from_millis(REQUEST_TIMEOUT),
704 }
705 }
706
707 pub fn set_frame_id(&mut self, frame_id: FrameId) {
709 if let Some(params) = self.req.params.as_object_mut() {
710 if let Entry::Vacant(entry) = params.entry("frameId") {
711 entry.insert(serde_json::Value::String(frame_id.into()));
712 }
713 }
714 }
715}
716
717#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
718pub enum LifecycleEvent {
719 #[default]
720 Load,
721 DomcontentLoaded,
722 NetworkIdle,
723 NetworkAlmostIdle,
724}
725
726impl AsRef<str> for LifecycleEvent {
727 fn as_ref(&self) -> &str {
728 match self {
729 LifecycleEvent::Load => "load",
730 LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
731 LifecycleEvent::NetworkIdle => "networkIdle",
732 LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
733 }
734 }
735}