1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10 AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11 EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12 EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17 browser_protocol::page::{self, FrameId},
18 };
20use chromiumoxide_types::{Method, MethodId, Request};
21use spider_fingerprint::BASE_CHROME_VERSION;
22
23use crate::error::DeadlineExceeded;
24use crate::handler::domworld::DOMWorld;
25use crate::handler::http::HttpRequest;
26use crate::handler::REQUEST_TIMEOUT;
27use crate::{cmd::CommandChain, ArcHttpRequest};
28
29lazy_static::lazy_static! {
30 static ref EVALUATION_SCRIPT_URL: String = format!("____{}___evaluation_script__", random_world_name(&BASE_CHROME_VERSION.to_string()));
32}
33
34pub fn random_world_name(id: &str) -> String {
36 use rand::Rng;
37 let mut rng = rand::rng();
38 let rand_len = rng.random_range(6..=12);
39
40 let id_part: String = id
42 .chars()
43 .filter(|c| c.is_ascii_alphanumeric())
44 .take(5)
45 .map(|c| {
46 let c = c.to_ascii_lowercase();
47 if c.is_ascii_alphabetic() {
48 c
49 } else {
50 (b'a' + (c as u8 - b'0') % 26) as char
52 }
53 })
54 .collect();
55
56 let rand_part: String = (0..rand_len)
58 .filter_map(|_| std::char::from_digit(rng.random_range(0..36), 36))
59 .collect();
60
61 let first = std::char::from_digit(rng.random_range(10..36), 36).unwrap_or('a');
63
64 format!("{first}{id_part}{rand_part}")
65}
66
67#[derive(Debug)]
69pub struct Frame {
70 parent_frame: Option<FrameId>,
72 id: FrameId,
74 main_world: DOMWorld,
75 secondary_world: DOMWorld,
76 loader_id: Option<LoaderId>,
77 url: Option<String>,
79 http_request: ArcHttpRequest,
81 child_frames: HashSet<FrameId>,
83 name: Option<String>,
84 lifecycle_events: HashSet<MethodId>,
86 isolated_world_name: String,
88}
89
90impl Frame {
91 pub fn new(id: FrameId) -> Self {
92 let isolated_world_name = random_world_name(id.inner());
93
94 Self {
95 parent_frame: None,
96 id,
97 main_world: Default::default(),
98 secondary_world: Default::default(),
99 loader_id: None,
100 url: None,
101 http_request: None,
102 child_frames: Default::default(),
103 name: None,
104 lifecycle_events: Default::default(),
105 isolated_world_name,
106 }
107 }
108
109 pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
110 parent.child_frames.insert(id.clone());
111 Self {
112 parent_frame: Some(parent.id.clone()),
113 id,
114 main_world: Default::default(),
115 secondary_world: Default::default(),
116 loader_id: None,
117 url: None,
118 http_request: None,
119 child_frames: Default::default(),
120 name: None,
121 lifecycle_events: Default::default(),
122 isolated_world_name: parent.isolated_world_name.clone(),
123 }
124 }
125
126 pub fn get_isolated_world_name(&self) -> &String {
127 &self.isolated_world_name
128 }
129
130 pub fn parent_id(&self) -> Option<&FrameId> {
131 self.parent_frame.as_ref()
132 }
133
134 pub fn id(&self) -> &FrameId {
135 &self.id
136 }
137
138 pub fn url(&self) -> Option<&str> {
139 self.url.as_deref()
140 }
141
142 pub fn name(&self) -> Option<&str> {
143 self.name.as_deref()
144 }
145
146 pub fn main_world(&self) -> &DOMWorld {
147 &self.main_world
148 }
149
150 pub fn secondary_world(&self) -> &DOMWorld {
151 &self.secondary_world
152 }
153
154 pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
155 &self.lifecycle_events
156 }
157
158 pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
159 self.http_request.as_ref()
160 }
161
162 fn navigated(&mut self, frame: &CdpFrame) {
163 self.name.clone_from(&frame.name);
164 let url = if let Some(ref fragment) = frame.url_fragment {
165 format!("{}{fragment}", frame.url)
166 } else {
167 frame.url.clone()
168 };
169 self.url = Some(url);
170 }
171
172 fn navigated_within_url(&mut self, url: String) {
173 self.url = Some(url)
174 }
175
176 fn on_loading_stopped(&mut self) {
177 self.lifecycle_events.insert("DOMContentLoaded".into());
178 self.lifecycle_events.insert("load".into());
179 }
180
181 fn on_loading_started(&mut self) {
182 self.lifecycle_events.clear();
183 self.http_request.take();
184 }
185
186 pub fn is_loaded(&self) -> bool {
187 self.lifecycle_events.contains("load")
188 }
189
190 pub fn clear_contexts(&mut self) {
191 self.main_world.take_context();
192 self.secondary_world.take_context();
193 }
194
195 pub fn destroy_context(&mut self, ctx_unique_id: &str) {
196 if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
197 self.main_world.take_context();
198 } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
199 self.secondary_world.take_context();
200 }
201 }
202
203 pub fn execution_context(&self) -> Option<ExecutionContextId> {
204 self.main_world.execution_context()
205 }
206
207 pub fn set_request(&mut self, request: HttpRequest) {
208 self.http_request = Some(Arc::new(request))
209 }
210}
211
212#[derive(Debug)]
216pub struct FrameManager {
217 main_frame: Option<FrameId>,
218 frames: HashMap<FrameId, Frame>,
219 context_ids: HashMap<String, FrameId>,
221 isolated_worlds: HashSet<String>,
222 request_timeout: Duration,
225 pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
227 navigation: Option<(NavigationWatcher, Instant)>,
229}
230
231impl FrameManager {
232 pub fn new(request_timeout: Duration) -> Self {
233 FrameManager {
234 main_frame: None,
235 frames: Default::default(),
236 context_ids: Default::default(),
237 isolated_worlds: Default::default(),
238 request_timeout,
239 pending_navigations: Default::default(),
240 navigation: None,
241 }
242 }
243
244 pub fn init_commands(timeout: Duration) -> CommandChain {
246 let enable = page::EnableParams::default();
247 let get_tree = page::GetFrameTreeParams::default();
248 let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
249 let mut commands = Vec::with_capacity(3);
253
254 let enable_id = enable.identifier();
255 let get_tree_id = get_tree.identifier();
256 let set_lifecycle_id = set_lifecycle.identifier();
257 if let Ok(value) = serde_json::to_value(enable) {
261 commands.push((enable_id, value));
262 }
263
264 if let Ok(value) = serde_json::to_value(get_tree) {
265 commands.push((get_tree_id, value));
266 }
267
268 if let Ok(value) = serde_json::to_value(set_lifecycle) {
269 commands.push((set_lifecycle_id, value));
270 }
271
272 CommandChain::new(commands, timeout)
281 }
282
283 pub fn main_frame(&self) -> Option<&Frame> {
284 self.main_frame.as_ref().and_then(|id| self.frames.get(id))
285 }
286
287 pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
288 if let Some(id) = self.main_frame.as_ref() {
289 self.frames.get_mut(id)
290 } else {
291 None
292 }
293 }
294
295 pub fn get_isolated_world_name(&self) -> Option<&String> {
297 self.main_frame
298 .as_ref()
299 .and_then(|id| match self.frames.get(id) {
300 Some(fid) => Some(fid.get_isolated_world_name()),
301 _ => None,
302 })
303 }
304
305 pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
306 self.frames.values()
307 }
308
309 pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
310 self.frames.get(id)
311 }
312
313 fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
314 watcher.expected_lifecycle.iter().all(|ev| {
315 frame.lifecycle_events.contains(ev)
316 || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
317 }) && frame
318 .child_frames
319 .iter()
320 .filter_map(|f| self.frames.get(f))
321 .all(|f| self.check_lifecycle(watcher, f))
322 }
323
324 fn check_lifecycle_complete(
325 &self,
326 watcher: &NavigationWatcher,
327 frame: &Frame,
328 ) -> Option<NavigationOk> {
329 if !self.check_lifecycle(watcher, frame) {
330 return None;
331 }
332 if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
333 return None;
334 }
335 if watcher.same_document_navigation {
336 return Some(NavigationOk::SameDocumentNavigation(watcher.id));
337 }
338 if frame.loader_id != watcher.loader_id {
339 return Some(NavigationOk::NewDocumentNavigation(watcher.id));
340 }
341 None
342 }
343
344 pub fn on_http_request_finished(&mut self, request: HttpRequest) {
346 if let Some(id) = request.frame.as_ref() {
347 if let Some(frame) = self.frames.get_mut(id) {
348 frame.set_request(request);
349 }
350 }
351 }
352
353 pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
354 if let Some((watcher, deadline)) = self.navigation.take() {
356 if now > deadline {
357 return Some(FrameEvent::NavigationResult(Err(
359 NavigationError::Timeout {
360 err: DeadlineExceeded::new(now, deadline),
361 id: watcher.id,
362 },
363 )));
364 }
365
366 if let Some(frame) = self.frames.get(&watcher.frame_id) {
367 if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
368 return Some(FrameEvent::NavigationResult(Ok(nav)));
371 } else {
372 self.navigation = Some((watcher, deadline));
374 }
375 } else {
376 return Some(FrameEvent::NavigationResult(Err(
377 NavigationError::FrameNotFound {
378 frame: watcher.frame_id,
379 id: watcher.id,
380 },
381 )));
382 }
383 } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
384 let deadline = Instant::now() + req.timeout;
386 self.navigation = Some((watcher, deadline));
387 return Some(FrameEvent::NavigationRequest(req.id, req.req));
388 }
389 None
390 }
391
392 pub fn goto(&mut self, req: FrameRequestedNavigation) {
394 if let Some(frame_id) = &self.main_frame {
395 self.navigate_frame(frame_id.clone(), req);
396 }
397 }
398
399 pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
401 let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
402 let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
403
404 req.set_frame_id(frame_id);
406
407 self.pending_navigations.push_back((req, watcher))
408 }
409
410 pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
412 }
414
415 pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
416 self.on_frame_attached(
417 frame_tree.frame.id.clone(),
418 frame_tree.frame.parent_id.clone().map(Into::into),
419 );
420 self.on_frame_navigated(&frame_tree.frame);
421 if let Some(children) = frame_tree.child_frames {
422 for child_tree in children {
423 self.on_frame_tree(child_tree);
424 }
425 }
426 }
427
428 pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
429 if self.frames.contains_key(&frame_id) {
430 return;
431 }
432 if let Some(parent_frame_id) = parent_frame_id {
433 if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
434 let frame = Frame::with_parent(frame_id.clone(), parent_frame);
435 self.frames.insert(frame_id, frame);
436 }
437 }
438 }
439
440 pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
441 self.remove_frames_recursively(&event.frame_id);
442 }
443
444 pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
445 if frame.parent_id.is_some() {
446 if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
447 for child in f.child_frames.drain() {
448 self.remove_frames_recursively(&child);
449 }
450 f.navigated(frame);
451 self.frames.insert(id, f);
452 }
453 } else {
454 let mut f = if let Some(main) = self.main_frame.take() {
455 if let Some(mut main_frame) = self.frames.remove(&main) {
457 for child in &main_frame.child_frames {
458 self.remove_frames_recursively(child);
459 }
460 main_frame.child_frames.clear();
462 main_frame.id = frame.id.clone();
463 main_frame
464 } else {
465 Frame::new(frame.id.clone())
466 }
467 } else {
468 Frame::new(frame.id.clone())
470 };
471 f.navigated(frame);
472 self.main_frame = Some(f.id.clone());
473 self.frames.insert(f.id.clone(), f);
474 }
475 }
476
477 pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
478 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
479 frame.navigated_within_url(event.url.clone());
480 }
481 if let Some((watcher, _)) = self.navigation.as_mut() {
482 watcher.on_frame_navigated_within_document(event);
483 }
484 }
485
486 pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
487 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
488 frame.on_loading_stopped();
489 }
490 }
491
492 pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
494 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
495 frame.on_loading_started();
496 }
497 }
498
499 pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
501
502 pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
504 if let Some(frame_id) = event
505 .context
506 .aux_data
507 .as_ref()
508 .and_then(|v| v["frameId"].as_str())
509 {
510 if let Some(frame) = self.frames.get_mut(frame_id) {
511 if event
512 .context
513 .aux_data
514 .as_ref()
515 .and_then(|v| v["isDefault"].as_bool())
516 .unwrap_or_default()
517 {
518 frame
519 .main_world
520 .set_context(event.context.id, event.context.unique_id.clone());
521 } else if event.context.name == frame.isolated_world_name
522 && frame.secondary_world.execution_context().is_none()
523 {
524 frame
525 .secondary_world
526 .set_context(event.context.id, event.context.unique_id.clone());
527 }
528 self.context_ids
529 .insert(event.context.unique_id.clone(), frame.id.clone());
530 }
531 }
532 if event
533 .context
534 .aux_data
535 .as_ref()
536 .filter(|v| v["type"].as_str() == Some("isolated"))
537 .is_some()
538 {
539 self.isolated_worlds.insert(event.context.name.clone());
540 }
541 }
542
543 pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
545 if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
546 if let Some(frame) = self.frames.get_mut(&id) {
547 frame.destroy_context(&event.execution_context_unique_id);
548 }
549 }
550 }
551
552 pub fn on_execution_contexts_cleared(&mut self) {
554 for id in self.context_ids.values() {
555 if let Some(frame) = self.frames.get_mut(id) {
556 frame.clear_contexts();
557 }
558 }
559 self.context_ids.clear()
560 }
561
562 pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
564 if let Some(frame) = self.frames.get_mut(&event.frame_id) {
565 if event.name == "init" {
566 frame.loader_id = Some(event.loader_id.clone());
567 frame.lifecycle_events.clear();
568 }
569 frame.lifecycle_events.insert(event.name.clone().into());
570 }
571 }
572
573 fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
575 if let Some(mut frame) = self.frames.remove(id) {
576 for child in &frame.child_frames {
577 self.remove_frames_recursively(child);
578 }
579 if let Some(parent_id) = frame.parent_frame.take() {
580 if let Some(parent) = self.frames.get_mut(&parent_id) {
581 parent.child_frames.remove(&frame.id);
582 }
583 }
584 Some(frame)
585 } else {
586 None
587 }
588 }
589
590 pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
591 if self.isolated_worlds.contains(world_name) {
592 return None;
593 }
594
595 self.isolated_worlds.insert(world_name.to_string());
596
597 if let Ok(cmd) = AddScriptToEvaluateOnNewDocumentParams::builder()
598 .source(format!("//# sourceURL={}", *EVALUATION_SCRIPT_URL))
599 .world_name(world_name)
600 .build()
601 {
602 let mut cmds = Vec::with_capacity(self.frames.len() + 1);
603 let identifier = cmd.identifier();
604
605 if let Ok(cmd) = serde_json::to_value(cmd) {
606 cmds.push((identifier, cmd));
607 }
608
609 let cm = self.frames.keys().filter_map(|id| {
610 if let Ok(cmd) = CreateIsolatedWorldParams::builder()
611 .frame_id(id.clone())
612 .grant_univeral_access(true)
613 .world_name(world_name)
614 .build()
615 {
616 let cm = (
617 cmd.identifier(),
618 serde_json::to_value(cmd).unwrap_or_default(),
619 );
620
621 Some(cm)
622 } else {
623 None
624 }
625 });
626
627 cmds.extend(cm);
628
629 Some(CommandChain::new(cmds, self.request_timeout))
630 } else {
631 None
632 }
633 }
634}
635
636#[derive(Debug)]
637pub enum FrameEvent {
638 NavigationResult(Result<NavigationOk, NavigationError>),
640 NavigationRequest(NavigationId, Request),
642 }
645
646#[derive(Debug)]
647pub enum NavigationError {
648 Timeout {
649 id: NavigationId,
650 err: DeadlineExceeded,
651 },
652 FrameNotFound {
653 id: NavigationId,
654 frame: FrameId,
655 },
656}
657
658impl NavigationError {
659 pub fn navigation_id(&self) -> &NavigationId {
660 match self {
661 NavigationError::Timeout { id, .. } => id,
662 NavigationError::FrameNotFound { id, .. } => id,
663 }
664 }
665}
666
667#[derive(Debug, Clone, Eq, PartialEq)]
668pub enum NavigationOk {
669 SameDocumentNavigation(NavigationId),
670 NewDocumentNavigation(NavigationId),
671}
672
673impl NavigationOk {
674 pub fn navigation_id(&self) -> &NavigationId {
675 match self {
676 NavigationOk::SameDocumentNavigation(id) => id,
677 NavigationOk::NewDocumentNavigation(id) => id,
678 }
679 }
680}
681
682#[derive(Debug)]
684pub struct NavigationWatcher {
685 id: NavigationId,
686 expected_lifecycle: HashSet<MethodId>,
687 frame_id: FrameId,
688 loader_id: Option<LoaderId>,
689 same_document_navigation: bool,
694}
695
696impl NavigationWatcher {
697 pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
698 Self {
699 id,
700 expected_lifecycle: std::iter::once("load".into()).collect(),
701 loader_id,
702 frame_id: frame,
703 same_document_navigation: false,
704 }
705 }
706
707 pub fn is_lifecycle_complete(&self) -> bool {
709 self.expected_lifecycle.is_empty()
710 }
711
712 fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
713 if self.frame_id == ev.frame_id {
714 self.same_document_navigation = true;
715 }
716 }
717}
718
719#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
721pub struct NavigationId(pub usize);
722
723#[derive(Debug)]
725pub struct FrameRequestedNavigation {
726 pub id: NavigationId,
728 pub req: Request,
730 pub timeout: Duration,
732}
733
734impl FrameRequestedNavigation {
735 pub fn new(id: NavigationId, req: Request) -> Self {
736 Self {
737 id,
738 req,
739 timeout: Duration::from_millis(REQUEST_TIMEOUT),
740 }
741 }
742
743 pub fn set_frame_id(&mut self, frame_id: FrameId) {
745 if let Some(params) = self.req.params.as_object_mut() {
746 if let Entry::Vacant(entry) = params.entry("frameId") {
747 entry.insert(serde_json::Value::String(frame_id.into()));
748 }
749 }
750 }
751}
752
753#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
754pub enum LifecycleEvent {
755 #[default]
756 Load,
757 DomcontentLoaded,
758 NetworkIdle,
759 NetworkAlmostIdle,
760}
761
762impl AsRef<str> for LifecycleEvent {
763 fn as_ref(&self) -> &str {
764 match self {
765 LifecycleEvent::Load => "load",
766 LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
767 LifecycleEvent::NetworkIdle => "networkIdle",
768 LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
769 }
770 }
771}