1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10 AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11 EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12 EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17 browser_protocol::page::{self, FrameId},
18 };
20use chromiumoxide_types::{Method, MethodId, Request};
21use spider_fingerprint::BASE_CHROME_VERSION;
22
23use crate::error::DeadlineExceeded;
24use crate::handler::domworld::DOMWorld;
25use crate::handler::http::HttpRequest;
26use crate::handler::REQUEST_TIMEOUT;
27use crate::{cmd::CommandChain, ArcHttpRequest};
28
29lazy_static::lazy_static! {
30 static ref EVALUATION_SCRIPT_URL: String = format!("____{}___evaluation_script__", random_world_name(&BASE_CHROME_VERSION.to_string()));
32}
33
34pub fn random_world_name(id: &str) -> String {
36 use rand::Rng;
37 let mut rng = rand::rng();
38 let rand_len = rng.random_range(6..=12);
39
40 let id_part: String = id
42 .chars()
43 .filter(|c| c.is_ascii_alphanumeric())
44 .take(5)
45 .map(|c| {
46 let c = c.to_ascii_lowercase();
47 if c.is_ascii_alphabetic() {
48 c
49 } else {
50 (b'a' + (c as u8 - b'0') % 26) as char
52 }
53 })
54 .collect();
55
56 let rand_part: String = (0..rand_len)
58 .filter_map(|_| std::char::from_digit(rng.random_range(0..36), 36))
59 .collect();
60
61 let first = std::char::from_digit(rng.random_range(10..36), 36).unwrap_or('a');
63
64 format!("{first}{id_part}{rand_part}")
65}
66
67#[derive(Debug)]
69pub struct Frame {
70 parent_frame: Option<FrameId>,
72 id: FrameId,
74 main_world: DOMWorld,
76 secondary_world: DOMWorld,
78 loader_id: Option<LoaderId>,
79 url: Option<String>,
81 http_request: ArcHttpRequest,
83 child_frames: HashSet<FrameId>,
85 name: Option<String>,
86 lifecycle_events: HashSet<MethodId>,
88 isolated_world_name: String,
90}
91
92impl Frame {
93 pub fn new(id: FrameId) -> Self {
94 let isolated_world_name = random_world_name(id.inner());
95
96 Self {
97 parent_frame: None,
98 id,
99 main_world: Default::default(),
100 secondary_world: Default::default(),
101 loader_id: None,
102 url: None,
103 http_request: None,
104 child_frames: Default::default(),
105 name: None,
106 lifecycle_events: Default::default(),
107 isolated_world_name,
108 }
109 }
110
111 pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
112 parent.child_frames.insert(id.clone());
113 Self {
114 parent_frame: Some(parent.id.clone()),
115 id,
116 main_world: Default::default(),
117 secondary_world: Default::default(),
118 loader_id: None,
119 url: None,
120 http_request: None,
121 child_frames: Default::default(),
122 name: None,
123 lifecycle_events: Default::default(),
124 isolated_world_name: parent.isolated_world_name.clone(),
125 }
126 }
127
128 pub fn get_isolated_world_name(&self) -> &String {
129 &self.isolated_world_name
130 }
131
132 pub fn parent_id(&self) -> Option<&FrameId> {
133 self.parent_frame.as_ref()
134 }
135
136 pub fn id(&self) -> &FrameId {
137 &self.id
138 }
139
140 pub fn url(&self) -> Option<&str> {
141 self.url.as_deref()
142 }
143
144 pub fn name(&self) -> Option<&str> {
145 self.name.as_deref()
146 }
147
148 pub fn main_world(&self) -> &DOMWorld {
149 &self.main_world
150 }
151
152 pub fn secondary_world(&self) -> &DOMWorld {
153 &self.secondary_world
154 }
155
156 pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
157 &self.lifecycle_events
158 }
159
160 pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
161 self.http_request.as_ref()
162 }
163
164 fn navigated(&mut self, frame: &CdpFrame) {
165 self.name.clone_from(&frame.name);
166 let url = if let Some(ref fragment) = frame.url_fragment {
167 format!("{}{fragment}", frame.url)
168 } else {
169 frame.url.clone()
170 };
171 self.url = Some(url);
172 }
173
174 fn navigated_within_url(&mut self, url: String) {
175 self.url = Some(url)
176 }
177
178 fn on_loading_stopped(&mut self) {
179 self.lifecycle_events.insert("DOMContentLoaded".into());
180 self.lifecycle_events.insert("load".into());
181 }
182
183 fn on_loading_started(&mut self) {
184 self.lifecycle_events.clear();
185 self.http_request.take();
186 }
187
188 pub fn is_loaded(&self) -> bool {
189 self.lifecycle_events.contains("load")
190 }
191
192 pub fn clear_contexts(&mut self) {
193 self.main_world.take_context();
194 self.secondary_world.take_context();
195 }
196
197 pub fn destroy_context(&mut self, ctx_unique_id: &str) {
198 if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
199 self.main_world.take_context();
200 } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
201 self.secondary_world.take_context();
202 }
203 }
204
205 pub fn execution_context(&self) -> Option<ExecutionContextId> {
206 self.main_world.execution_context()
207 }
208
209 pub fn set_request(&mut self, request: HttpRequest) {
210 self.http_request = Some(Arc::new(request))
211 }
212}
213
214#[derive(Debug)]
218pub struct FrameManager {
219 main_frame: Option<FrameId>,
220 frames: HashMap<FrameId, Frame>,
221 context_ids: HashMap<String, FrameId>,
223 isolated_worlds: HashSet<String>,
224 request_timeout: Duration,
227 pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
229 navigation: Option<(NavigationWatcher, Instant)>,
231}
232
233impl FrameManager {
234 pub fn new(request_timeout: Duration) -> Self {
235 FrameManager {
236 main_frame: None,
237 frames: Default::default(),
238 context_ids: Default::default(),
239 isolated_worlds: Default::default(),
240 request_timeout,
241 pending_navigations: Default::default(),
242 navigation: None,
243 }
244 }
245
246 pub fn init_commands(timeout: Duration) -> CommandChain {
248 let enable = page::EnableParams::default();
249 let get_tree = page::GetFrameTreeParams::default();
250 let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
251 let mut commands = Vec::with_capacity(3);
255
256 let enable_id = enable.identifier();
257 let get_tree_id = get_tree.identifier();
258 let set_lifecycle_id = set_lifecycle.identifier();
259 if let Ok(value) = serde_json::to_value(enable) {
263 commands.push((enable_id, value));
264 }
265
266 if let Ok(value) = serde_json::to_value(get_tree) {
267 commands.push((get_tree_id, value));
268 }
269
270 if let Ok(value) = serde_json::to_value(set_lifecycle) {
271 commands.push((set_lifecycle_id, value));
272 }
273
274 CommandChain::new(commands, timeout)
283 }
284
285 pub fn main_frame(&self) -> Option<&Frame> {
286 self.main_frame.as_ref().and_then(|id| self.frames.get(id))
287 }
288
289 pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
290 if let Some(id) = self.main_frame.as_ref() {
291 self.frames.get_mut(id)
292 } else {
293 None
294 }
295 }
296
297 pub fn get_isolated_world_name(&self) -> Option<&String> {
299 self.main_frame
300 .as_ref()
301 .and_then(|id| match self.frames.get(id) {
302 Some(fid) => Some(fid.get_isolated_world_name()),
303 _ => None,
304 })
305 }
306
307 pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
308 self.frames.values()
309 }
310
311 pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
312 self.frames.get(id)
313 }
314
315 fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
316 watcher.expected_lifecycle.iter().all(|ev| {
317 frame.lifecycle_events.contains(ev)
318 || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
319 }) && frame
320 .child_frames
321 .iter()
322 .filter_map(|f| self.frames.get(f))
323 .all(|f| self.check_lifecycle(watcher, f))
324 }
325
326 fn check_lifecycle_complete(
327 &self,
328 watcher: &NavigationWatcher,
329 frame: &Frame,
330 ) -> Option<NavigationOk> {
331 if !self.check_lifecycle(watcher, frame) {
332 return None;
333 }
334 if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
335 return None;
336 }
337 if watcher.same_document_navigation {
338 return Some(NavigationOk::SameDocumentNavigation(watcher.id));
339 }
340 if frame.loader_id != watcher.loader_id {
341 return Some(NavigationOk::NewDocumentNavigation(watcher.id));
342 }
343 None
344 }
345
346 pub fn on_http_request_finished(&mut self, request: HttpRequest) {
348 if let Some(id) = request.frame.as_ref() {
349 if let Some(frame) = self.frames.get_mut(id) {
350 frame.set_request(request);
351 }
352 }
353 }
354
355 pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
356 if let Some((watcher, deadline)) = self.navigation.take() {
358 if now > deadline {
359 return Some(FrameEvent::NavigationResult(Err(
361 NavigationError::Timeout {
362 err: DeadlineExceeded::new(now, deadline),
363 id: watcher.id,
364 },
365 )));
366 }
367
368 if let Some(frame) = self.frames.get(&watcher.frame_id) {
369 if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
370 return Some(FrameEvent::NavigationResult(Ok(nav)));
373 } else {
374 self.navigation = Some((watcher, deadline));
376 }
377 } else {
378 return Some(FrameEvent::NavigationResult(Err(
379 NavigationError::FrameNotFound {
380 frame: watcher.frame_id,
381 id: watcher.id,
382 },
383 )));
384 }
385 } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
386 let deadline = Instant::now() + req.timeout;
388 self.navigation = Some((watcher, deadline));
389 return Some(FrameEvent::NavigationRequest(req.id, req.req));
390 }
391 None
392 }
393
394 pub fn goto(&mut self, req: FrameRequestedNavigation) {
396 if let Some(frame_id) = &self.main_frame {
397 self.navigate_frame(frame_id.clone(), req);
398 }
399 }
400
401 pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
403 let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
404 let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
405
406 req.set_frame_id(frame_id);
408
409 self.pending_navigations.push_back((req, watcher))
410 }
411
412 pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
414 }
416
417 pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
418 self.on_frame_attached(
419 frame_tree.frame.id.clone(),
420 frame_tree.frame.parent_id.clone().map(Into::into),
421 );
422 self.on_frame_navigated(&frame_tree.frame);
423 if let Some(children) = frame_tree.child_frames {
424 for child_tree in children {
425 self.on_frame_tree(child_tree);
426 }
427 }
428 }
429
430 pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
431 if self.frames.contains_key(&frame_id) {
432 return;
433 }
434 if let Some(parent_frame_id) = parent_frame_id {
435 if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
436 let frame = Frame::with_parent(frame_id.clone(), parent_frame);
437 self.frames.insert(frame_id, frame);
438 }
439 }
440 }
441
442 pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
443 self.remove_frames_recursively(&event.frame_id);
444 }
445
446 pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
447 if frame.parent_id.is_some() {
448 if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
449 for child in f.child_frames.drain() {
450 self.remove_frames_recursively(&child);
451 }
452 f.navigated(frame);
453 self.frames.insert(id, f);
454 }
455 } else {
456 let mut f = if let Some(main) = self.main_frame.take() {
457 if let Some(mut main_frame) = self.frames.remove(&main) {
459 for child in &main_frame.child_frames {
460 self.remove_frames_recursively(child);
461 }
462 main_frame.child_frames.clear();
464 main_frame.id = frame.id.clone();
465 main_frame
466 } else {
467 Frame::new(frame.id.clone())
468 }
469 } else {
470 Frame::new(frame.id.clone())
472 };
473 f.navigated(frame);
474 self.main_frame = Some(f.id.clone());
475 self.frames.insert(f.id.clone(), f);
476 }
477 }
478
479 pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
480 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
481 frame.navigated_within_url(event.url.clone());
482 }
483 if let Some((watcher, _)) = self.navigation.as_mut() {
484 watcher.on_frame_navigated_within_document(event);
485 }
486 }
487
488 pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
489 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
490 frame.on_loading_stopped();
491 }
492 }
493
494 pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
496 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
497 frame.on_loading_started();
498 }
499 }
500
501 pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
503
504 pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
506 if let Some(frame_id) = event
507 .context
508 .aux_data
509 .as_ref()
510 .and_then(|v| v["frameId"].as_str())
511 {
512 if let Some(frame) = self.frames.get_mut(frame_id) {
513 if event
514 .context
515 .aux_data
516 .as_ref()
517 .and_then(|v| v["isDefault"].as_bool())
518 .unwrap_or_default()
519 {
520 frame
521 .main_world
522 .set_context(event.context.id, event.context.unique_id.clone());
523 } else if event.context.name == frame.isolated_world_name
524 && frame.secondary_world.execution_context().is_none()
525 {
526 frame
527 .secondary_world
528 .set_context(event.context.id, event.context.unique_id.clone());
529 }
530 self.context_ids
531 .insert(event.context.unique_id.clone(), frame.id.clone());
532 }
533 }
534 if event
535 .context
536 .aux_data
537 .as_ref()
538 .filter(|v| v["type"].as_str() == Some("isolated"))
539 .is_some()
540 {
541 self.isolated_worlds.insert(event.context.name.clone());
542 }
543 }
544
545 pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
547 if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
548 if let Some(frame) = self.frames.get_mut(&id) {
549 frame.destroy_context(&event.execution_context_unique_id);
550 }
551 }
552 }
553
554 pub fn on_execution_contexts_cleared(&mut self) {
556 for id in self.context_ids.values() {
557 if let Some(frame) = self.frames.get_mut(id) {
558 frame.clear_contexts();
559 }
560 }
561 self.context_ids.clear()
562 }
563
564 pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
566 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
567 if event.name == "init" {
568 frame.loader_id = Some(event.loader_id.clone());
569 frame.lifecycle_events.clear();
570 }
571 frame.lifecycle_events.insert(event.name.clone().into());
572 }
573 }
574
575 fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
577 if let Some(mut frame) = self.frames.remove(id) {
578 for child in &frame.child_frames {
579 self.remove_frames_recursively(child);
580 }
581 if let Some(parent_id) = frame.parent_frame.take() {
582 if let Some(parent) = self.frames.get_mut(&parent_id) {
583 parent.child_frames.remove(&frame.id);
584 }
585 }
586 Some(frame)
587 } else {
588 None
589 }
590 }
591
592 pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
593 if self.isolated_worlds.contains(world_name) {
594 return None;
595 }
596
597 self.isolated_worlds.insert(world_name.to_string());
598
599 if let Ok(cmd) = AddScriptToEvaluateOnNewDocumentParams::builder()
600 .source(format!("//# sourceURL={}", *EVALUATION_SCRIPT_URL))
601 .world_name(world_name)
602 .build()
603 {
604 let mut cmds = Vec::with_capacity(self.frames.len() + 1);
605 let identifier = cmd.identifier();
606
607 if let Ok(cmd) = serde_json::to_value(cmd) {
608 cmds.push((identifier, cmd));
609 }
610
611 let cm = self.frames.keys().filter_map(|id| {
612 if let Ok(cmd) = CreateIsolatedWorldParams::builder()
613 .frame_id(id.clone())
614 .grant_univeral_access(true)
615 .world_name(world_name)
616 .build()
617 {
618 let cm = (
619 cmd.identifier(),
620 serde_json::to_value(cmd).unwrap_or_default(),
621 );
622
623 Some(cm)
624 } else {
625 None
626 }
627 });
628
629 cmds.extend(cm);
630
631 Some(CommandChain::new(cmds, self.request_timeout))
632 } else {
633 None
634 }
635 }
636}
637
638#[derive(Debug)]
639pub enum FrameEvent {
640 NavigationResult(Result<NavigationOk, NavigationError>),
642 NavigationRequest(NavigationId, Request),
644 }
647
648#[derive(Debug)]
649pub enum NavigationError {
650 Timeout {
651 id: NavigationId,
652 err: DeadlineExceeded,
653 },
654 FrameNotFound {
655 id: NavigationId,
656 frame: FrameId,
657 },
658}
659
660impl NavigationError {
661 pub fn navigation_id(&self) -> &NavigationId {
662 match self {
663 NavigationError::Timeout { id, .. } => id,
664 NavigationError::FrameNotFound { id, .. } => id,
665 }
666 }
667}
668
669#[derive(Debug, Clone, Eq, PartialEq)]
670pub enum NavigationOk {
671 SameDocumentNavigation(NavigationId),
672 NewDocumentNavigation(NavigationId),
673}
674
675impl NavigationOk {
676 pub fn navigation_id(&self) -> &NavigationId {
677 match self {
678 NavigationOk::SameDocumentNavigation(id) => id,
679 NavigationOk::NewDocumentNavigation(id) => id,
680 }
681 }
682}
683
684#[derive(Debug)]
686pub struct NavigationWatcher {
687 id: NavigationId,
688 expected_lifecycle: HashSet<MethodId>,
689 frame_id: FrameId,
690 loader_id: Option<LoaderId>,
691 same_document_navigation: bool,
696}
697
698impl NavigationWatcher {
699 pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
700 Self {
701 id,
702 expected_lifecycle: std::iter::once("load".into()).collect(),
703 loader_id,
704 frame_id: frame,
705 same_document_navigation: false,
706 }
707 }
708
709 pub fn is_lifecycle_complete(&self) -> bool {
711 self.expected_lifecycle.is_empty()
712 }
713
714 fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
715 if self.frame_id == ev.frame_id {
716 self.same_document_navigation = true;
717 }
718 }
719}
720
721#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
723pub struct NavigationId(pub usize);
724
725#[derive(Debug)]
727pub struct FrameRequestedNavigation {
728 pub id: NavigationId,
730 pub req: Request,
732 pub timeout: Duration,
734}
735
736impl FrameRequestedNavigation {
737 pub fn new(id: NavigationId, req: Request) -> Self {
738 Self {
739 id,
740 req,
741 timeout: Duration::from_millis(REQUEST_TIMEOUT),
742 }
743 }
744
745 pub fn set_frame_id(&mut self, frame_id: FrameId) {
747 if let Some(params) = self.req.params.as_object_mut() {
748 if let Entry::Vacant(entry) = params.entry("frameId") {
749 entry.insert(serde_json::Value::String(frame_id.into()));
750 }
751 }
752 }
753}
754
755#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
756pub enum LifecycleEvent {
757 #[default]
758 Load,
759 DomcontentLoaded,
760 NetworkIdle,
761 NetworkAlmostIdle,
762}
763
764impl AsRef<str> for LifecycleEvent {
765 fn as_ref(&self) -> &str {
766 match self {
767 LifecycleEvent::Load => "load",
768 LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
769 LifecycleEvent::NetworkIdle => "networkIdle",
770 LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
771 }
772 }
773}