1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10 AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11 EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12 EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17 browser_protocol::page::{self, FrameId},
18 js_protocol::runtime,
19};
20use chromiumoxide_types::{Method, MethodId, Request};
21
22use crate::error::DeadlineExceeded;
23use crate::handler::domworld::DOMWorld;
24use crate::handler::http::HttpRequest;
25use crate::handler::REQUEST_TIMEOUT;
26use crate::{cmd::CommandChain, ArcHttpRequest};
27
28const EVALUATION_SCRIPT_URL: &str = "____chromiumoxide_utility_world___evaluation_script__";
29
30lazy_static::lazy_static! {
31 static ref CHROME_SPOOF_RUNTIME: bool = {
33 std::env::var("CHROME_SPOOF_RUNTIME").unwrap_or_else(|_| "false".to_string()) == "true"
34 };
35}
36
37pub fn random_world_name(id: &str) -> String {
39 use rand::Rng;
40 let mut rng = rand::thread_rng();
41 let rand_len = rng.gen_range(6..=12);
42
43 let id_part: String = id
45 .chars()
46 .filter(|c| c.is_ascii_alphanumeric())
47 .take(5)
48 .map(|c| {
49 let c = c.to_ascii_lowercase();
50 if c.is_ascii_alphabetic() {
51 c
52 } else {
53 (b'a' + (c as u8 - b'0') % 26) as char
55 }
56 })
57 .collect();
58
59 let rand_part: String = (0..rand_len)
61 .filter_map(|_| std::char::from_digit(rng.gen_range(0..36), 36))
62 .collect();
63
64 let first = std::char::from_digit(rng.gen_range(10..36), 36).unwrap_or('a');
66
67 format!("{first}{id_part}{rand_part}")
68}
69
70#[derive(Debug)]
72pub struct Frame {
73 parent_frame: Option<FrameId>,
74 id: FrameId,
76 main_world: DOMWorld,
77 secondary_world: DOMWorld,
78 loader_id: Option<LoaderId>,
79 url: Option<String>,
81 http_request: ArcHttpRequest,
83 child_frames: HashSet<FrameId>,
85 name: Option<String>,
86 lifecycle_events: HashSet<MethodId>,
88 isolated_world_name: String,
89}
90
91impl Frame {
92 pub fn new(id: FrameId) -> Self {
93 let isolated_world_name = random_world_name(id.inner());
94
95 Self {
96 parent_frame: None,
97 id,
98 main_world: Default::default(),
99 secondary_world: Default::default(),
100 loader_id: None,
101 url: None,
102 http_request: None,
103 child_frames: Default::default(),
104 name: None,
105 lifecycle_events: Default::default(),
106 isolated_world_name,
107 }
108 }
109
110 pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
111 parent.child_frames.insert(id.clone());
112 Self {
113 parent_frame: Some(parent.id.clone()),
114 id,
115 main_world: Default::default(),
116 secondary_world: Default::default(),
117 loader_id: None,
118 url: None,
119 http_request: None,
120 child_frames: Default::default(),
121 name: None,
122 lifecycle_events: Default::default(),
123 isolated_world_name: parent.isolated_world_name.clone(),
124 }
125 }
126
127 pub fn get_isolated_world_name(&self) -> &String {
128 &self.isolated_world_name
129 }
130
131 pub fn parent_id(&self) -> Option<&FrameId> {
132 self.parent_frame.as_ref()
133 }
134
135 pub fn id(&self) -> &FrameId {
136 &self.id
137 }
138
139 pub fn url(&self) -> Option<&str> {
140 self.url.as_deref()
141 }
142
143 pub fn name(&self) -> Option<&str> {
144 self.name.as_deref()
145 }
146
147 pub fn main_world(&self) -> &DOMWorld {
148 &self.main_world
149 }
150
151 pub fn secondary_world(&self) -> &DOMWorld {
152 &self.secondary_world
153 }
154
155 pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
156 &self.lifecycle_events
157 }
158
159 pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
160 self.http_request.as_ref()
161 }
162
163 fn navigated(&mut self, frame: &CdpFrame) {
164 self.name.clone_from(&frame.name);
165 let url = if let Some(ref fragment) = frame.url_fragment {
166 format!("{}{fragment}", frame.url)
167 } else {
168 frame.url.clone()
169 };
170 self.url = Some(url);
171 }
172
173 fn navigated_within_url(&mut self, url: String) {
174 self.url = Some(url)
175 }
176
177 fn on_loading_stopped(&mut self) {
178 self.lifecycle_events.insert("DOMContentLoaded".into());
179 self.lifecycle_events.insert("load".into());
180 }
181
182 fn on_loading_started(&mut self) {
183 self.lifecycle_events.clear();
184 self.http_request.take();
185 }
186
187 pub fn is_loaded(&self) -> bool {
188 self.lifecycle_events.contains("load")
189 }
190
191 pub fn clear_contexts(&mut self) {
192 self.main_world.take_context();
193 self.secondary_world.take_context();
194 }
195
196 pub fn destroy_context(&mut self, ctx_unique_id: &str) {
197 if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
198 self.main_world.take_context();
199 } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
200 self.secondary_world.take_context();
201 }
202 }
203
204 pub fn execution_context(&self) -> Option<ExecutionContextId> {
205 self.main_world.execution_context()
206 }
207
208 pub fn set_request(&mut self, request: HttpRequest) {
209 self.http_request = Some(Arc::new(request))
210 }
211}
212
213#[derive(Debug)]
217pub struct FrameManager {
218 main_frame: Option<FrameId>,
219 frames: HashMap<FrameId, Frame>,
220 context_ids: HashMap<String, FrameId>,
222 isolated_worlds: HashSet<String>,
223 request_timeout: Duration,
226 pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
228 navigation: Option<(NavigationWatcher, Instant)>,
230}
231
232impl FrameManager {
233 pub fn new(request_timeout: Duration) -> Self {
234 FrameManager {
235 main_frame: None,
236 frames: Default::default(),
237 context_ids: Default::default(),
238 isolated_worlds: Default::default(),
239 request_timeout,
240 pending_navigations: Default::default(),
241 navigation: None,
242 }
243 }
244
245 pub fn init_commands(timeout: Duration) -> CommandChain {
247 let enable = page::EnableParams::default();
248 let get_tree = page::GetFrameTreeParams::default();
249 let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
250 let enable_runtime = runtime::EnableParams::default();
251 let disable_runtime = runtime::DisableParams::default();
252
253 let mut commands = Vec::with_capacity(if *CHROME_SPOOF_RUNTIME { 5 } else { 4 });
254
255 let enable_id = enable.identifier();
256 let get_tree_id = get_tree.identifier();
257 let set_lifecycle_id = set_lifecycle.identifier();
258 let enable_runtime_id = enable_runtime.identifier();
259 let disable_runtime_id = disable_runtime.identifier();
260
261 if let Ok(value) = serde_json::to_value(enable) {
262 commands.push((enable_id, value));
263 }
264
265 if let Ok(value) = serde_json::to_value(get_tree) {
266 commands.push((get_tree_id, value));
267 }
268
269 if let Ok(value) = serde_json::to_value(set_lifecycle) {
270 commands.push((set_lifecycle_id, value));
271 }
272
273 if let Ok(value) = serde_json::to_value(enable_runtime) {
274 commands.push((enable_runtime_id, value));
275 }
276
277 if *CHROME_SPOOF_RUNTIME {
278 if let Ok(value) = serde_json::to_value(disable_runtime) {
279 commands.push((disable_runtime_id, value));
280 }
281 }
282
283 CommandChain::new(commands, timeout)
284 }
285
286 pub fn main_frame(&self) -> Option<&Frame> {
287 self.main_frame.as_ref().and_then(|id| self.frames.get(id))
288 }
289
290 pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
291 if let Some(id) = self.main_frame.as_ref() {
292 self.frames.get_mut(id)
293 } else {
294 None
295 }
296 }
297
298 pub fn get_isolated_world_name(&self) -> Option<&String> {
300 self.main_frame
301 .as_ref()
302 .and_then(|id| match self.frames.get(id) {
303 Some(fid) => Some(fid.get_isolated_world_name()),
304 _ => None,
305 })
306 }
307
308 pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
309 self.frames.values()
310 }
311
312 pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
313 self.frames.get(id)
314 }
315
316 fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
317 watcher.expected_lifecycle.iter().all(|ev| {
318 frame.lifecycle_events.contains(ev)
319 || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
320 }) && frame
321 .child_frames
322 .iter()
323 .filter_map(|f| self.frames.get(f))
324 .all(|f| self.check_lifecycle(watcher, f))
325 }
326
327 fn check_lifecycle_complete(
328 &self,
329 watcher: &NavigationWatcher,
330 frame: &Frame,
331 ) -> Option<NavigationOk> {
332 if !self.check_lifecycle(watcher, frame) {
333 return None;
334 }
335 if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
336 return None;
337 }
338 if watcher.same_document_navigation {
339 return Some(NavigationOk::SameDocumentNavigation(watcher.id));
340 }
341 if frame.loader_id != watcher.loader_id {
342 return Some(NavigationOk::NewDocumentNavigation(watcher.id));
343 }
344 None
345 }
346
347 pub fn on_http_request_finished(&mut self, request: HttpRequest) {
349 if let Some(id) = request.frame.as_ref() {
350 if let Some(frame) = self.frames.get_mut(id) {
351 frame.set_request(request);
352 }
353 }
354 }
355
356 pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
357 if let Some((watcher, deadline)) = self.navigation.take() {
359 if now > deadline {
360 return Some(FrameEvent::NavigationResult(Err(
362 NavigationError::Timeout {
363 err: DeadlineExceeded::new(now, deadline),
364 id: watcher.id,
365 },
366 )));
367 }
368
369 if let Some(frame) = self.frames.get(&watcher.frame_id) {
370 if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
371 return Some(FrameEvent::NavigationResult(Ok(nav)));
374 } else {
375 self.navigation = Some((watcher, deadline));
377 }
378 } else {
379 return Some(FrameEvent::NavigationResult(Err(
380 NavigationError::FrameNotFound {
381 frame: watcher.frame_id,
382 id: watcher.id,
383 },
384 )));
385 }
386 } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
387 let deadline = Instant::now() + req.timeout;
389 self.navigation = Some((watcher, deadline));
390 return Some(FrameEvent::NavigationRequest(req.id, req.req));
391 }
392 None
393 }
394
395 pub fn goto(&mut self, req: FrameRequestedNavigation) {
397 if let Some(frame_id) = &self.main_frame {
398 self.navigate_frame(frame_id.clone(), req);
399 }
400 }
401
402 pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
404 let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
405 let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
406
407 req.set_frame_id(frame_id);
409
410 self.pending_navigations.push_back((req, watcher))
411 }
412
413 pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
415 }
417
418 pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
419 self.on_frame_attached(
420 frame_tree.frame.id.clone(),
421 frame_tree.frame.parent_id.clone().map(Into::into),
422 );
423 self.on_frame_navigated(&frame_tree.frame);
424 if let Some(children) = frame_tree.child_frames {
425 for child_tree in children {
426 self.on_frame_tree(child_tree);
427 }
428 }
429 }
430
431 pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
432 if self.frames.contains_key(&frame_id) {
433 return;
434 }
435 if let Some(parent_frame_id) = parent_frame_id {
436 if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
437 let frame = Frame::with_parent(frame_id.clone(), parent_frame);
438 self.frames.insert(frame_id, frame);
439 }
440 }
441 }
442
443 pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
444 self.remove_frames_recursively(&event.frame_id);
445 }
446
447 pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
448 if frame.parent_id.is_some() {
449 if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
450 for child in f.child_frames.drain() {
451 self.remove_frames_recursively(&child);
452 }
453 f.navigated(frame);
454 self.frames.insert(id, f);
455 }
456 } else {
457 let mut f = if let Some(main) = self.main_frame.take() {
458 if let Some(mut main_frame) = self.frames.remove(&main) {
460 for child in &main_frame.child_frames {
461 self.remove_frames_recursively(child);
462 }
463 main_frame.child_frames.clear();
465 main_frame.id = frame.id.clone();
466 main_frame
467 } else {
468 Frame::new(frame.id.clone())
469 }
470 } else {
471 Frame::new(frame.id.clone())
473 };
474 f.navigated(frame);
475 self.main_frame = Some(f.id.clone());
476 self.frames.insert(f.id.clone(), f);
477 }
478 }
479
480 pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
481 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
482 frame.navigated_within_url(event.url.clone());
483 }
484 if let Some((watcher, _)) = self.navigation.as_mut() {
485 watcher.on_frame_navigated_within_document(event);
486 }
487 }
488
489 pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
490 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
491 frame.on_loading_stopped();
492 }
493 }
494
495 pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
497 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
498 frame.on_loading_started();
499 }
500 }
501
502 pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
504
505 pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
507 if let Some(frame_id) = event
508 .context
509 .aux_data
510 .as_ref()
511 .and_then(|v| v["frameId"].as_str())
512 {
513 if let Some(frame) = self.frames.get_mut(frame_id) {
514 if event
515 .context
516 .aux_data
517 .as_ref()
518 .and_then(|v| v["isDefault"].as_bool())
519 .unwrap_or_default()
520 {
521 frame
522 .main_world
523 .set_context(event.context.id, event.context.unique_id.clone());
524 } else if event.context.name == frame.isolated_world_name
525 && frame.secondary_world.execution_context().is_none()
526 {
527 frame
528 .secondary_world
529 .set_context(event.context.id, event.context.unique_id.clone());
530 }
531 self.context_ids
532 .insert(event.context.unique_id.clone(), frame.id.clone());
533 }
534 }
535 if event
536 .context
537 .aux_data
538 .as_ref()
539 .filter(|v| v["type"].as_str() == Some("isolated"))
540 .is_some()
541 {
542 self.isolated_worlds.insert(event.context.name.clone());
543 }
544 }
545
546 pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
548 if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
549 if let Some(frame) = self.frames.get_mut(&id) {
550 frame.destroy_context(&event.execution_context_unique_id);
551 }
552 }
553 }
554
555 pub fn on_execution_contexts_cleared(&mut self) {
557 for id in self.context_ids.values() {
558 if let Some(frame) = self.frames.get_mut(id) {
559 frame.clear_contexts();
560 }
561 }
562 self.context_ids.clear()
563 }
564
565 pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
567 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
568 if event.name == "init" {
569 frame.loader_id = Some(event.loader_id.clone());
570 frame.lifecycle_events.clear();
571 }
572 frame.lifecycle_events.insert(event.name.clone().into());
573 }
574 }
575
576 fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
578 if let Some(mut frame) = self.frames.remove(id) {
579 for child in &frame.child_frames {
580 self.remove_frames_recursively(child);
581 }
582 if let Some(parent_id) = frame.parent_frame.take() {
583 if let Some(parent) = self.frames.get_mut(&parent_id) {
584 parent.child_frames.remove(&frame.id);
585 }
586 }
587 Some(frame)
588 } else {
589 None
590 }
591 }
592
593 pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
594 if self.isolated_worlds.contains(world_name) {
595 return None;
596 }
597
598 self.isolated_worlds.insert(world_name.to_string());
599
600 let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
601 .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
602 .world_name(world_name)
603 .build()
604 .unwrap();
605
606 let mut cmds = Vec::with_capacity(self.frames.len() + 1);
607
608 cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
609
610 let cm = self.frames.keys().filter_map(|id| {
611 if let Ok(cmd) = CreateIsolatedWorldParams::builder()
612 .frame_id(id.clone())
613 .grant_univeral_access(true)
614 .world_name(world_name)
615 .build()
616 {
617 let cm = (
618 cmd.identifier(),
619 serde_json::to_value(cmd).unwrap_or_default(),
620 );
621
622 Some(cm)
623 } else {
624 None
625 }
626 });
627
628 cmds.extend(cm);
629
630 Some(CommandChain::new(cmds, self.request_timeout))
631 }
632}
633
634#[derive(Debug)]
635pub enum FrameEvent {
636 NavigationResult(Result<NavigationOk, NavigationError>),
638 NavigationRequest(NavigationId, Request),
640 }
643
644#[derive(Debug)]
645pub enum NavigationError {
646 Timeout {
647 id: NavigationId,
648 err: DeadlineExceeded,
649 },
650 FrameNotFound {
651 id: NavigationId,
652 frame: FrameId,
653 },
654}
655
656impl NavigationError {
657 pub fn navigation_id(&self) -> &NavigationId {
658 match self {
659 NavigationError::Timeout { id, .. } => id,
660 NavigationError::FrameNotFound { id, .. } => id,
661 }
662 }
663}
664
665#[derive(Debug, Clone, Eq, PartialEq)]
666pub enum NavigationOk {
667 SameDocumentNavigation(NavigationId),
668 NewDocumentNavigation(NavigationId),
669}
670
671impl NavigationOk {
672 pub fn navigation_id(&self) -> &NavigationId {
673 match self {
674 NavigationOk::SameDocumentNavigation(id) => id,
675 NavigationOk::NewDocumentNavigation(id) => id,
676 }
677 }
678}
679
680#[derive(Debug)]
682pub struct NavigationWatcher {
683 id: NavigationId,
684 expected_lifecycle: HashSet<MethodId>,
685 frame_id: FrameId,
686 loader_id: Option<LoaderId>,
687 same_document_navigation: bool,
692}
693
694impl NavigationWatcher {
695 pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
696 Self {
697 id,
698 expected_lifecycle: std::iter::once("load".into()).collect(),
699 loader_id,
700 frame_id: frame,
701 same_document_navigation: false,
702 }
703 }
704
705 pub fn is_lifecycle_complete(&self) -> bool {
707 self.expected_lifecycle.is_empty()
708 }
709
710 fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
711 if self.frame_id == ev.frame_id {
712 self.same_document_navigation = true;
713 }
714 }
715}
716
717#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
719pub struct NavigationId(pub usize);
720
721#[derive(Debug)]
723pub struct FrameRequestedNavigation {
724 pub id: NavigationId,
726 pub req: Request,
728 pub timeout: Duration,
730}
731
732impl FrameRequestedNavigation {
733 pub fn new(id: NavigationId, req: Request) -> Self {
734 Self {
735 id,
736 req,
737 timeout: Duration::from_millis(REQUEST_TIMEOUT),
738 }
739 }
740
741 pub fn set_frame_id(&mut self, frame_id: FrameId) {
743 if let Some(params) = self.req.params.as_object_mut() {
744 if let Entry::Vacant(entry) = params.entry("frameId") {
745 entry.insert(serde_json::Value::String(frame_id.into()));
746 }
747 }
748 }
749}
750
751#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
752pub enum LifecycleEvent {
753 #[default]
754 Load,
755 DomcontentLoaded,
756 NetworkIdle,
757 NetworkAlmostIdle,
758}
759
760impl AsRef<str> for LifecycleEvent {
761 fn as_ref(&self) -> &str {
762 match self {
763 LifecycleEvent::Load => "load",
764 LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
765 LifecycleEvent::NetworkIdle => "networkIdle",
766 LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
767 }
768 }
769}