1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{self, FrameId};
10use chromiumoxide_cdp::cdp::browser_protocol::page::{
11 AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
12 EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
13 EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
14};
15use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
16use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
17use chromiumoxide_types::{Method, MethodId, Request};
18
19use crate::error::DeadlineExceeded;
20use crate::handler::domworld::DOMWorld;
21use crate::handler::http::HttpRequest;
22use crate::handler::REQUEST_TIMEOUT;
23use crate::{cmd::CommandChain, ArcHttpRequest};
24
25pub const UTILITY_WORLD_NAME: &str = "util";
26const EVALUATION_SCRIPT_URL: &str = "app.js";
27
28#[derive(Debug)]
30pub struct Frame {
31 parent_frame: Option<FrameId>,
32 id: FrameId,
34 main_world: DOMWorld,
35 secondary_world: DOMWorld,
36 loader_id: Option<LoaderId>,
37 url: Option<String>,
39 http_request: ArcHttpRequest,
41 child_frames: HashSet<FrameId>,
43 name: Option<String>,
44 lifecycle_events: HashSet<MethodId>,
46}
47
48impl Frame {
49 pub fn new(id: FrameId) -> Self {
50 Self {
51 parent_frame: None,
52 id,
53 main_world: Default::default(),
54 secondary_world: Default::default(),
55 loader_id: None,
56 url: None,
57 http_request: None,
58 child_frames: Default::default(),
59 name: None,
60 lifecycle_events: Default::default(),
61 }
62 }
63
64 pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
65 parent.child_frames.insert(id.clone());
66 Self {
67 parent_frame: Some(parent.id.clone()),
68 id,
69 main_world: Default::default(),
70 secondary_world: Default::default(),
71 loader_id: None,
72 url: None,
73 http_request: None,
74 child_frames: Default::default(),
75 name: None,
76 lifecycle_events: Default::default(),
77 }
78 }
79
80 pub fn parent_id(&self) -> Option<&FrameId> {
81 self.parent_frame.as_ref()
82 }
83
84 pub fn id(&self) -> &FrameId {
85 &self.id
86 }
87
88 pub fn url(&self) -> Option<&str> {
89 self.url.as_deref()
90 }
91
92 pub fn name(&self) -> Option<&str> {
93 self.name.as_deref()
94 }
95
96 pub fn main_world(&self) -> &DOMWorld {
97 &self.main_world
98 }
99
100 pub fn secondary_world(&self) -> &DOMWorld {
101 &self.secondary_world
102 }
103
104 pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
105 &self.lifecycle_events
106 }
107
108 pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
109 self.http_request.as_ref()
110 }
111
112 fn navigated(&mut self, frame: &CdpFrame) {
113 self.name.clone_from(&frame.name);
114 let url = if let Some(ref fragment) = frame.url_fragment {
115 format!("{}{fragment}", frame.url)
116 } else {
117 frame.url.clone()
118 };
119 self.url = Some(url);
120 }
121
122 fn navigated_within_url(&mut self, url: String) {
123 self.url = Some(url)
124 }
125
126 fn on_loading_stopped(&mut self) {
127 self.lifecycle_events.insert("DOMContentLoaded".into());
128 self.lifecycle_events.insert("load".into());
129 }
130
131 fn on_loading_started(&mut self) {
132 self.lifecycle_events.clear();
133 self.http_request.take();
134 }
135
136 pub fn is_loaded(&self) -> bool {
137 self.lifecycle_events.contains("load")
138 }
139
140 pub fn clear_contexts(&mut self) {
141 self.main_world.take_context();
142 self.secondary_world.take_context();
143 }
144
145 pub fn destroy_context(&mut self, ctx_unique_id: &str) {
146 if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
147 self.main_world.take_context();
148 } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
149 self.secondary_world.take_context();
150 }
151 }
152
153 pub fn execution_context(&self) -> Option<ExecutionContextId> {
154 self.main_world.execution_context()
155 }
156
157 pub fn set_request(&mut self, request: HttpRequest) {
158 self.http_request = Some(Arc::new(request))
159 }
160}
161
162impl From<CdpFrame> for Frame {
163 fn from(frame: CdpFrame) -> Self {
164 Self {
165 parent_frame: frame.parent_id,
166 id: frame.id,
167 main_world: Default::default(),
168 secondary_world: Default::default(),
169 loader_id: Some(frame.loader_id),
170 url: Some(frame.url),
171 http_request: None,
172 child_frames: Default::default(),
173 name: frame.name,
174 lifecycle_events: Default::default(),
175 }
176 }
177}
178
179#[derive(Debug)]
183pub struct FrameManager {
184 main_frame: Option<FrameId>,
185 frames: HashMap<FrameId, Frame>,
186 context_ids: HashMap<String, FrameId>,
188 isolated_worlds: HashSet<String>,
189 request_timeout: Duration,
192 pending_navigations: VecDeque<(FrameNavigationRequest, NavigationWatcher)>,
194 navigation: Option<(NavigationWatcher, Instant)>,
196}
197
198impl FrameManager {
199 pub fn new(request_timeout: Duration) -> Self {
200 FrameManager {
201 main_frame: None,
202 frames: Default::default(),
203 context_ids: Default::default(),
204 isolated_worlds: Default::default(),
205 request_timeout,
206 pending_navigations: Default::default(),
207 navigation: None,
208 }
209 }
210
211 pub fn init_commands(timeout: Duration) -> CommandChain {
213 let enable = page::EnableParams::default();
214 let get_tree = page::GetFrameTreeParams::default();
215 let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
216
217 CommandChain::new(
221 vec![
222 (enable.identifier(), serde_json::to_value(enable).unwrap()),
223 (
224 get_tree.identifier(),
225 serde_json::to_value(get_tree).unwrap(),
226 ),
227 (
228 set_lifecycle.identifier(),
229 serde_json::to_value(set_lifecycle).unwrap(),
230 ),
231 ],
232 timeout,
233 )
234 }
235
236 pub fn main_frame(&self) -> Option<&Frame> {
237 self.main_frame.as_ref().and_then(|id| self.frames.get(id))
238 }
239
240 pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
241 if let Some(id) = self.main_frame.as_ref() {
242 self.frames.get_mut(id)
243 } else {
244 None
245 }
246 }
247
248 pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
249 self.frames.values()
250 }
251
252 pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
253 self.frames.get(id)
254 }
255
256 fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
257 watcher.expected_lifecycle.iter().all(|ev| {
258 frame.lifecycle_events.contains(ev)
259 || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
260 }) && frame
261 .child_frames
262 .iter()
263 .filter_map(|f| self.frames.get(f))
264 .all(|f| self.check_lifecycle(watcher, f))
265 }
266
267 fn check_lifecycle_complete(
268 &self,
269 watcher: &NavigationWatcher,
270 frame: &Frame,
271 ) -> Option<NavigationOk> {
272 if !self.check_lifecycle(watcher, frame) {
273 return None;
274 }
275 if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
276 return None;
277 }
278 if watcher.same_document_navigation {
279 return Some(NavigationOk::SameDocumentNavigation(watcher.id));
280 }
281 if frame.loader_id != watcher.loader_id {
282 return Some(NavigationOk::NewDocumentNavigation(watcher.id));
283 }
284 None
285 }
286
287 pub fn on_http_request_finished(&mut self, request: HttpRequest) {
289 if let Some(id) = request.frame.as_ref() {
290 if let Some(frame) = self.frames.get_mut(id) {
291 frame.set_request(request);
292 }
293 }
294 }
295
296 pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
297 if let Some((watcher, deadline)) = self.navigation.take() {
299 if now > deadline {
300 return Some(FrameEvent::NavigationResult(Err(
302 NavigationError::Timeout {
303 err: DeadlineExceeded::new(now, deadline),
304 id: watcher.id,
305 },
306 )));
307 }
308 if let Some(frame) = self.frames.get(&watcher.frame_id) {
309 if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
310 return Some(FrameEvent::NavigationResult(Ok(nav)));
313 } else {
314 self.navigation = Some((watcher, deadline));
316 }
317 } else {
318 return Some(FrameEvent::NavigationResult(Err(
319 NavigationError::FrameNotFound {
320 frame: watcher.frame_id,
321 id: watcher.id,
322 },
323 )));
324 }
325 } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
326 let deadline = Instant::now() + req.timeout;
328 self.navigation = Some((watcher, deadline));
329 return Some(FrameEvent::NavigationRequest(req.id, req.req));
330 }
331 None
332 }
333
334 pub fn goto(&mut self, req: FrameNavigationRequest) {
336 if let Some(frame_id) = self.main_frame.clone() {
337 self.navigate_frame(frame_id, req);
338 }
339 }
340
341 pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameNavigationRequest) {
343 let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
344 let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
345 req.set_frame_id(frame_id);
347 self.pending_navigations.push_back((req, watcher))
348 }
349
350 pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
352 }
354
355 pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
356 self.on_frame_attached(
357 frame_tree.frame.id.clone(),
358 frame_tree.frame.parent_id.clone(),
359 );
360 self.on_frame_navigated(&frame_tree.frame);
361 if let Some(children) = frame_tree.child_frames {
362 for child_tree in children {
363 self.on_frame_tree(child_tree);
364 }
365 }
366 }
367
368 pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
369 if self.frames.contains_key(&frame_id) {
370 return;
371 }
372 if let Some(parent_frame_id) = parent_frame_id {
373 if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
374 let frame = Frame::with_parent(frame_id.clone(), parent_frame);
375 self.frames.insert(frame_id, frame);
376 }
377 }
378 }
379
380 pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
381 self.remove_frames_recursively(&event.frame_id);
382 }
383
384 pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
385 if frame.parent_id.is_some() {
386 if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
387 for child in &f.child_frames {
388 self.remove_frames_recursively(child);
389 }
390 f.child_frames.clear();
392 f.navigated(frame);
393 self.frames.insert(id, f);
394 }
395 } else {
396 let mut f = if let Some(main) = self.main_frame.take() {
397 let mut main_frame = self.frames.remove(&main).expect("Main frame is tracked.");
399 for child in &main_frame.child_frames {
400 self.remove_frames_recursively(child);
401 }
402 main_frame.child_frames.clear();
404 main_frame.id = frame.id.clone();
405 main_frame
406 } else {
407 Frame::new(frame.id.clone())
409 };
410 f.navigated(frame);
411 self.main_frame = Some(f.id.clone());
412 self.frames.insert(f.id.clone(), f);
413 }
414 }
415
416 pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
417 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
418 frame.navigated_within_url(event.url.clone());
419 }
420 if let Some((watcher, _)) = self.navigation.as_mut() {
421 watcher.on_frame_navigated_within_document(event);
422 }
423 }
424
425 pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
426 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
427 frame.on_loading_stopped();
428 }
429 }
430
431 pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
433 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
434 frame.on_loading_started();
435 }
436 }
437
438 pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
440
441 pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
443 if let Some(frame_id) = event
444 .context
445 .aux_data
446 .as_ref()
447 .and_then(|v| v["frameId"].as_str())
448 {
449 if let Some(frame) = self.frames.get_mut(frame_id) {
450 if event
451 .context
452 .aux_data
453 .as_ref()
454 .and_then(|v| v["isDefault"].as_bool())
455 .unwrap_or_default()
456 {
457 frame
458 .main_world
459 .set_context(event.context.id, event.context.unique_id.clone());
460 } else if event.context.name == UTILITY_WORLD_NAME
461 && frame.secondary_world.execution_context().is_none()
462 {
463 frame
464 .secondary_world
465 .set_context(event.context.id, event.context.unique_id.clone());
466 }
467 self.context_ids
468 .insert(event.context.unique_id.clone(), frame.id.clone());
469 }
470 }
471 if event
472 .context
473 .aux_data
474 .as_ref()
475 .filter(|v| v["type"].as_str() == Some("isolated"))
476 .is_some()
477 {
478 self.isolated_worlds.insert(event.context.name.clone());
479 }
480 }
481
482 pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
484 if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
485 if let Some(frame) = self.frames.get_mut(&id) {
486 frame.destroy_context(&event.execution_context_unique_id);
487 }
488 }
489 }
490
491 pub fn on_execution_contexts_cleared(&mut self) {
493 for id in self.context_ids.values() {
494 if let Some(frame) = self.frames.get_mut(id) {
495 frame.clear_contexts();
496 }
497 }
498 self.context_ids.clear()
499 }
500
501 pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
503 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
504 if event.name == "init" {
505 frame.loader_id = Some(event.loader_id.clone());
506 frame.lifecycle_events.clear();
507 }
508 frame.lifecycle_events.insert(event.name.clone().into());
509 }
510 }
511
512 fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
514 if let Some(mut frame) = self.frames.remove(id) {
515 for child in &frame.child_frames {
516 self.remove_frames_recursively(child);
517 }
518 if let Some(parent_id) = frame.parent_frame.take() {
519 if let Some(parent) = self.frames.get_mut(&parent_id) {
520 parent.child_frames.remove(&frame.id);
521 }
522 }
523 Some(frame)
524 } else {
525 None
526 }
527 }
528
529 pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
530 if self.isolated_worlds.contains(world_name) {
531 return None;
532 }
533 self.isolated_worlds.insert(world_name.to_string());
534 let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
535 .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
536 .world_name(world_name)
537 .build()
538 .unwrap();
539
540 let mut cmds = Vec::with_capacity(self.frames.len() + 1);
541
542 cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
543
544 cmds.extend(self.frames.keys().map(|id| {
545 let cmd = CreateIsolatedWorldParams::builder()
546 .frame_id(id.clone())
547 .grant_univeral_access(true)
548 .world_name(world_name)
549 .build()
550 .unwrap();
551 (cmd.identifier(), serde_json::to_value(cmd).unwrap())
552 }));
553 Some(CommandChain::new(cmds, self.request_timeout))
554 }
555}
556
557#[derive(Debug)]
558pub enum FrameEvent {
559 NavigationResult(Result<NavigationOk, NavigationError>),
561 NavigationRequest(NavigationId, Request),
563 }
566
567#[derive(Debug)]
568pub enum NavigationError {
569 Timeout {
570 id: NavigationId,
571 err: DeadlineExceeded,
572 },
573 FrameNotFound {
574 id: NavigationId,
575 frame: FrameId,
576 },
577}
578
579impl NavigationError {
580 pub fn navigation_id(&self) -> &NavigationId {
581 match self {
582 NavigationError::Timeout { id, .. } => id,
583 NavigationError::FrameNotFound { id, .. } => id,
584 }
585 }
586}
587
588#[derive(Debug, Clone, Eq, PartialEq)]
589pub enum NavigationOk {
590 SameDocumentNavigation(NavigationId),
591 NewDocumentNavigation(NavigationId),
592}
593
594impl NavigationOk {
595 pub fn navigation_id(&self) -> &NavigationId {
596 match self {
597 NavigationOk::SameDocumentNavigation(id) => id,
598 NavigationOk::NewDocumentNavigation(id) => id,
599 }
600 }
601}
602
603#[derive(Debug)]
605pub struct NavigationWatcher {
606 id: NavigationId,
607 expected_lifecycle: HashSet<MethodId>,
608 frame_id: FrameId,
609 loader_id: Option<LoaderId>,
610 same_document_navigation: bool,
615}
616
617impl NavigationWatcher {
618 pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
619 Self {
620 id,
621 expected_lifecycle: std::iter::once("load".into()).collect(),
622 loader_id,
623 frame_id: frame,
624 same_document_navigation: false,
625 }
626 }
627
628 pub fn is_lifecycle_complete(&self) -> bool {
630 self.expected_lifecycle.is_empty()
631 }
632
633 fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
634 if self.frame_id == ev.frame_id {
635 self.same_document_navigation = true;
636 }
637 }
638}
639
640#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
642pub struct NavigationId(pub usize);
643
644#[derive(Debug)]
646pub struct FrameNavigationRequest {
647 pub id: NavigationId,
649 pub req: Request,
651 pub timeout: Duration,
653}
654
655impl FrameNavigationRequest {
656 pub fn new(id: NavigationId, req: Request) -> Self {
657 Self {
658 id,
659 req,
660 timeout: Duration::from_millis(REQUEST_TIMEOUT),
661 }
662 }
663
664 pub fn set_frame_id(&mut self, frame_id: FrameId) {
666 if let Some(params) = self.req.params.as_object_mut() {
667 if let Entry::Vacant(entry) = params.entry("frameId") {
668 entry.insert(serde_json::Value::String(frame_id.into()));
669 }
670 }
671 }
672}
673
674#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
675pub enum LifecycleEvent {
676 #[default]
677 Load,
678 DomcontentLoaded,
679 NetworkIdle,
680 NetworkAlmostIdle,
681}
682
683impl AsRef<str> for LifecycleEvent {
684 fn as_ref(&self) -> &str {
685 match self {
686 LifecycleEvent::Load => "load",
687 LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
688 LifecycleEvent::NetworkIdle => "networkIdle",
689 LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
690 }
691 }
692}