Skip to main content

chaser_oxide/handler/
frame.rs

1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{self, FrameId};
10use chromiumoxide_cdp::cdp::browser_protocol::page::{
11    AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
12    EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
13    EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
14};
15use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
16use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
17use chromiumoxide_types::{Method, MethodId, Request};
18
19use crate::error::DeadlineExceeded;
20use crate::handler::domworld::DOMWorld;
21use crate::handler::http::HttpRequest;
22use crate::handler::REQUEST_TIMEOUT;
23use crate::{cmd::CommandChain, ArcHttpRequest};
24
25pub const UTILITY_WORLD_NAME: &str = "util";
26const EVALUATION_SCRIPT_URL: &str = "app.js";
27
28/// Represents a frame on the page
29#[derive(Debug)]
30pub struct Frame {
31    parent_frame: Option<FrameId>,
32    /// Cdp identifier of this frame
33    id: FrameId,
34    main_world: DOMWorld,
35    secondary_world: DOMWorld,
36    loader_id: Option<LoaderId>,
37    /// Current url of this frame
38    url: Option<String>,
39    /// The http request that loaded this with this frame
40    http_request: ArcHttpRequest,
41    /// The frames contained in this frame
42    child_frames: HashSet<FrameId>,
43    name: Option<String>,
44    /// The received lifecycle events
45    lifecycle_events: HashSet<MethodId>,
46}
47
48impl Frame {
49    pub fn new(id: FrameId) -> Self {
50        Self {
51            parent_frame: None,
52            id,
53            main_world: Default::default(),
54            secondary_world: Default::default(),
55            loader_id: None,
56            url: None,
57            http_request: None,
58            child_frames: Default::default(),
59            name: None,
60            lifecycle_events: Default::default(),
61        }
62    }
63
64    pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
65        parent.child_frames.insert(id.clone());
66        Self {
67            parent_frame: Some(parent.id.clone()),
68            id,
69            main_world: Default::default(),
70            secondary_world: Default::default(),
71            loader_id: None,
72            url: None,
73            http_request: None,
74            child_frames: Default::default(),
75            name: None,
76            lifecycle_events: Default::default(),
77        }
78    }
79
80    pub fn parent_id(&self) -> Option<&FrameId> {
81        self.parent_frame.as_ref()
82    }
83
84    pub fn id(&self) -> &FrameId {
85        &self.id
86    }
87
88    pub fn url(&self) -> Option<&str> {
89        self.url.as_deref()
90    }
91
92    pub fn name(&self) -> Option<&str> {
93        self.name.as_deref()
94    }
95
96    pub fn main_world(&self) -> &DOMWorld {
97        &self.main_world
98    }
99
100    pub fn secondary_world(&self) -> &DOMWorld {
101        &self.secondary_world
102    }
103
104    pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
105        &self.lifecycle_events
106    }
107
108    pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
109        self.http_request.as_ref()
110    }
111
112    fn navigated(&mut self, frame: &CdpFrame) {
113        self.name.clone_from(&frame.name);
114        let url = if let Some(ref fragment) = frame.url_fragment {
115            format!("{}{fragment}", frame.url)
116        } else {
117            frame.url.clone()
118        };
119        self.url = Some(url);
120    }
121
122    fn navigated_within_url(&mut self, url: String) {
123        self.url = Some(url)
124    }
125
126    fn on_loading_stopped(&mut self) {
127        self.lifecycle_events.insert("DOMContentLoaded".into());
128        self.lifecycle_events.insert("load".into());
129    }
130
131    fn on_loading_started(&mut self) {
132        self.lifecycle_events.clear();
133        self.http_request.take();
134    }
135
136    pub fn is_loaded(&self) -> bool {
137        self.lifecycle_events.contains("load")
138    }
139
140    pub fn clear_contexts(&mut self) {
141        self.main_world.take_context();
142        self.secondary_world.take_context();
143    }
144
145    pub fn destroy_context(&mut self, ctx_unique_id: &str) {
146        if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
147            self.main_world.take_context();
148        } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
149            self.secondary_world.take_context();
150        }
151    }
152
153    pub fn execution_context(&self) -> Option<ExecutionContextId> {
154        self.main_world.execution_context()
155    }
156
157    pub fn set_request(&mut self, request: HttpRequest) {
158        self.http_request = Some(Arc::new(request))
159    }
160}
161
162impl From<CdpFrame> for Frame {
163    fn from(frame: CdpFrame) -> Self {
164        Self {
165            parent_frame: frame.parent_id,
166            id: frame.id,
167            main_world: Default::default(),
168            secondary_world: Default::default(),
169            loader_id: Some(frame.loader_id),
170            url: Some(frame.url),
171            http_request: None,
172            child_frames: Default::default(),
173            name: frame.name,
174            lifecycle_events: Default::default(),
175        }
176    }
177}
178
179/// Maintains the state of the pages frame and listens to events produced by
180/// chromium targeting the `Target`. Also listens for events that indicate that
181/// a navigation was completed
182#[derive(Debug)]
183pub struct FrameManager {
184    main_frame: Option<FrameId>,
185    frames: HashMap<FrameId, Frame>,
186    /// The contexts mapped with their frames
187    context_ids: HashMap<String, FrameId>,
188    isolated_worlds: HashSet<String>,
189    /// Timeout after which an anticipated event (related to navigation) doesn't
190    /// arrive results in an error
191    request_timeout: Duration,
192    /// Track currently in progress navigation
193    pending_navigations: VecDeque<(FrameNavigationRequest, NavigationWatcher)>,
194    /// The currently ongoing navigation
195    navigation: Option<(NavigationWatcher, Instant)>,
196}
197
198impl FrameManager {
199    pub fn new(request_timeout: Duration) -> Self {
200        FrameManager {
201            main_frame: None,
202            frames: Default::default(),
203            context_ids: Default::default(),
204            isolated_worlds: Default::default(),
205            request_timeout,
206            pending_navigations: Default::default(),
207            navigation: None,
208        }
209    }
210
211    /// The commands to execute in order to initialize this frame manager
212    pub fn init_commands(timeout: Duration) -> CommandChain {
213        let enable = page::EnableParams::default();
214        let get_tree = page::GetFrameTreeParams::default();
215        let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
216
217        // chaser-oxide Stealth: We do NOT enable Runtime here.
218        // Context IDs are obtained via Page.createIsolatedWorld on-demand.
219
220        CommandChain::new(
221            vec![
222                (enable.identifier(), serde_json::to_value(enable).unwrap()),
223                (
224                    get_tree.identifier(),
225                    serde_json::to_value(get_tree).unwrap(),
226                ),
227                (
228                    set_lifecycle.identifier(),
229                    serde_json::to_value(set_lifecycle).unwrap(),
230                ),
231            ],
232            timeout,
233        )
234    }
235
236    pub fn main_frame(&self) -> Option<&Frame> {
237        self.main_frame.as_ref().and_then(|id| self.frames.get(id))
238    }
239
240    pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
241        if let Some(id) = self.main_frame.as_ref() {
242            self.frames.get_mut(id)
243        } else {
244            None
245        }
246    }
247
248    pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
249        self.frames.values()
250    }
251
252    pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
253        self.frames.get(id)
254    }
255
256    fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
257        watcher.expected_lifecycle.iter().all(|ev| {
258            frame.lifecycle_events.contains(ev)
259                || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
260        }) && frame
261            .child_frames
262            .iter()
263            .filter_map(|f| self.frames.get(f))
264            .all(|f| self.check_lifecycle(watcher, f))
265    }
266
267    fn check_lifecycle_complete(
268        &self,
269        watcher: &NavigationWatcher,
270        frame: &Frame,
271    ) -> Option<NavigationOk> {
272        if !self.check_lifecycle(watcher, frame) {
273            return None;
274        }
275        if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
276            return None;
277        }
278        if watcher.same_document_navigation {
279            return Some(NavigationOk::SameDocumentNavigation(watcher.id));
280        }
281        if frame.loader_id != watcher.loader_id {
282            return Some(NavigationOk::NewDocumentNavigation(watcher.id));
283        }
284        None
285    }
286
287    /// Track the request in the frame
288    pub fn on_http_request_finished(&mut self, request: HttpRequest) {
289        if let Some(id) = request.frame.as_ref() {
290            if let Some(frame) = self.frames.get_mut(id) {
291                frame.set_request(request);
292            }
293        }
294    }
295
296    pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
297        // check if the navigation completed
298        if let Some((watcher, deadline)) = self.navigation.take() {
299            if now > deadline {
300                // navigation request timed out
301                return Some(FrameEvent::NavigationResult(Err(
302                    NavigationError::Timeout {
303                        err: DeadlineExceeded::new(now, deadline),
304                        id: watcher.id,
305                    },
306                )));
307            }
308            if let Some(frame) = self.frames.get(&watcher.frame_id) {
309                if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
310                    // request is complete if the frame's lifecycle is complete = frame received all
311                    // required events
312                    return Some(FrameEvent::NavigationResult(Ok(nav)));
313                } else {
314                    // not finished yet
315                    self.navigation = Some((watcher, deadline));
316                }
317            } else {
318                return Some(FrameEvent::NavigationResult(Err(
319                    NavigationError::FrameNotFound {
320                        frame: watcher.frame_id,
321                        id: watcher.id,
322                    },
323                )));
324            }
325        } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
326            // queue in the next navigation that is must be fulfilled until `deadline`
327            let deadline = Instant::now() + req.timeout;
328            self.navigation = Some((watcher, deadline));
329            return Some(FrameEvent::NavigationRequest(req.id, req.req));
330        }
331        None
332    }
333
334    /// Entrypoint for page navigation
335    pub fn goto(&mut self, req: FrameNavigationRequest) {
336        if let Some(frame_id) = self.main_frame.clone() {
337            self.navigate_frame(frame_id, req);
338        }
339    }
340
341    /// Navigate a specific frame
342    pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameNavigationRequest) {
343        let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
344        let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
345        // insert the frame_id in the request if not present
346        req.set_frame_id(frame_id);
347        self.pending_navigations.push_back((req, watcher))
348    }
349
350    /// Fired when a frame moved to another session
351    pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
352        // _onFrameMoved
353    }
354
355    pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
356        self.on_frame_attached(
357            frame_tree.frame.id.clone(),
358            frame_tree.frame.parent_id.clone(),
359        );
360        self.on_frame_navigated(&frame_tree.frame);
361        if let Some(children) = frame_tree.child_frames {
362            for child_tree in children {
363                self.on_frame_tree(child_tree);
364            }
365        }
366    }
367
368    pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
369        if self.frames.contains_key(&frame_id) {
370            return;
371        }
372        if let Some(parent_frame_id) = parent_frame_id {
373            if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
374                let frame = Frame::with_parent(frame_id.clone(), parent_frame);
375                self.frames.insert(frame_id, frame);
376            }
377        }
378    }
379
380    pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
381        self.remove_frames_recursively(&event.frame_id);
382    }
383
384    pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
385        if frame.parent_id.is_some() {
386            if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
387                for child in &f.child_frames {
388                    self.remove_frames_recursively(child);
389                }
390                // this is necessary since we can't borrow mut and then remove recursively
391                f.child_frames.clear();
392                f.navigated(frame);
393                self.frames.insert(id, f);
394            }
395        } else {
396            let mut f = if let Some(main) = self.main_frame.take() {
397                // update main frame
398                let mut main_frame = self.frames.remove(&main).expect("Main frame is tracked.");
399                for child in &main_frame.child_frames {
400                    self.remove_frames_recursively(child);
401                }
402                // this is necessary since we can't borrow mut and then remove recursively
403                main_frame.child_frames.clear();
404                main_frame.id = frame.id.clone();
405                main_frame
406            } else {
407                // initial main frame navigation
408                Frame::new(frame.id.clone())
409            };
410            f.navigated(frame);
411            self.main_frame = Some(f.id.clone());
412            self.frames.insert(f.id.clone(), f);
413        }
414    }
415
416    pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
417        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
418            frame.navigated_within_url(event.url.clone());
419        }
420        if let Some((watcher, _)) = self.navigation.as_mut() {
421            watcher.on_frame_navigated_within_document(event);
422        }
423    }
424
425    pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
426        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
427            frame.on_loading_stopped();
428        }
429    }
430
431    /// Fired when frame has started loading.
432    pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
433        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
434            frame.on_loading_started();
435        }
436    }
437
438    /// Notification is issued every time when binding is called
439    pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
440
441    /// Issued when new execution context is created
442    pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
443        if let Some(frame_id) = event
444            .context
445            .aux_data
446            .as_ref()
447            .and_then(|v| v["frameId"].as_str())
448        {
449            if let Some(frame) = self.frames.get_mut(frame_id) {
450                if event
451                    .context
452                    .aux_data
453                    .as_ref()
454                    .and_then(|v| v["isDefault"].as_bool())
455                    .unwrap_or_default()
456                {
457                    frame
458                        .main_world
459                        .set_context(event.context.id, event.context.unique_id.clone());
460                } else if event.context.name == UTILITY_WORLD_NAME
461                    && frame.secondary_world.execution_context().is_none()
462                {
463                    frame
464                        .secondary_world
465                        .set_context(event.context.id, event.context.unique_id.clone());
466                }
467                self.context_ids
468                    .insert(event.context.unique_id.clone(), frame.id.clone());
469            }
470        }
471        if event
472            .context
473            .aux_data
474            .as_ref()
475            .filter(|v| v["type"].as_str() == Some("isolated"))
476            .is_some()
477        {
478            self.isolated_worlds.insert(event.context.name.clone());
479        }
480    }
481
482    /// Issued when execution context is destroyed
483    pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
484        if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
485            if let Some(frame) = self.frames.get_mut(&id) {
486                frame.destroy_context(&event.execution_context_unique_id);
487            }
488        }
489    }
490
491    /// Issued when all executionContexts were cleared
492    pub fn on_execution_contexts_cleared(&mut self) {
493        for id in self.context_ids.values() {
494            if let Some(frame) = self.frames.get_mut(id) {
495                frame.clear_contexts();
496            }
497        }
498        self.context_ids.clear()
499    }
500
501    /// Fired for top level page lifecycle events (nav, load, paint, etc.)
502    pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
503        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
504            if event.name == "init" {
505                frame.loader_id = Some(event.loader_id.clone());
506                frame.lifecycle_events.clear();
507            }
508            frame.lifecycle_events.insert(event.name.clone().into());
509        }
510    }
511
512    /// Detach all child frames
513    fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
514        if let Some(mut frame) = self.frames.remove(id) {
515            for child in &frame.child_frames {
516                self.remove_frames_recursively(child);
517            }
518            if let Some(parent_id) = frame.parent_frame.take() {
519                if let Some(parent) = self.frames.get_mut(&parent_id) {
520                    parent.child_frames.remove(&frame.id);
521                }
522            }
523            Some(frame)
524        } else {
525            None
526        }
527    }
528
529    pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
530        if self.isolated_worlds.contains(world_name) {
531            return None;
532        }
533        self.isolated_worlds.insert(world_name.to_string());
534        let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
535            .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
536            .world_name(world_name)
537            .build()
538            .unwrap();
539
540        let mut cmds = Vec::with_capacity(self.frames.len() + 1);
541
542        cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
543
544        cmds.extend(self.frames.keys().map(|id| {
545            let cmd = CreateIsolatedWorldParams::builder()
546                .frame_id(id.clone())
547                .grant_univeral_access(true)
548                .world_name(world_name)
549                .build()
550                .unwrap();
551            (cmd.identifier(), serde_json::to_value(cmd).unwrap())
552        }));
553        Some(CommandChain::new(cmds, self.request_timeout))
554    }
555}
556
557#[derive(Debug)]
558pub enum FrameEvent {
559    /// A previously submitted navigation has finished
560    NavigationResult(Result<NavigationOk, NavigationError>),
561    /// A new navigation request needs to be submitted
562    NavigationRequest(NavigationId, Request),
563    /* /// The initial page of the target has been loaded
564     * InitialPageLoadFinished */
565}
566
567#[derive(Debug)]
568pub enum NavigationError {
569    Timeout {
570        id: NavigationId,
571        err: DeadlineExceeded,
572    },
573    FrameNotFound {
574        id: NavigationId,
575        frame: FrameId,
576    },
577}
578
579impl NavigationError {
580    pub fn navigation_id(&self) -> &NavigationId {
581        match self {
582            NavigationError::Timeout { id, .. } => id,
583            NavigationError::FrameNotFound { id, .. } => id,
584        }
585    }
586}
587
588#[derive(Debug, Clone, Eq, PartialEq)]
589pub enum NavigationOk {
590    SameDocumentNavigation(NavigationId),
591    NewDocumentNavigation(NavigationId),
592}
593
594impl NavigationOk {
595    pub fn navigation_id(&self) -> &NavigationId {
596        match self {
597            NavigationOk::SameDocumentNavigation(id) => id,
598            NavigationOk::NewDocumentNavigation(id) => id,
599        }
600    }
601}
602
603/// Tracks the progress of an issued `Page.navigate` request until completion.
604#[derive(Debug)]
605pub struct NavigationWatcher {
606    id: NavigationId,
607    expected_lifecycle: HashSet<MethodId>,
608    frame_id: FrameId,
609    loader_id: Option<LoaderId>,
610    /// Once we receive the response to the issued `Page.navigate` request we
611    /// can detect whether we were navigating withing the same document or were
612    /// navigating to a new document by checking if a loader was included in the
613    /// response.
614    same_document_navigation: bool,
615}
616
617impl NavigationWatcher {
618    pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
619        Self {
620            id,
621            expected_lifecycle: std::iter::once("load".into()).collect(),
622            loader_id,
623            frame_id: frame,
624            same_document_navigation: false,
625        }
626    }
627
628    /// Checks whether the navigation was completed
629    pub fn is_lifecycle_complete(&self) -> bool {
630        self.expected_lifecycle.is_empty()
631    }
632
633    fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
634        if self.frame_id == ev.frame_id {
635            self.same_document_navigation = true;
636        }
637    }
638}
639
640/// An identifier for an ongoing navigation
641#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
642pub struct NavigationId(pub usize);
643
644/// Represents a the request for a navigation
645#[derive(Debug)]
646pub struct FrameNavigationRequest {
647    /// The internal identifier
648    pub id: NavigationId,
649    /// the cdp request that will trigger the navigation
650    pub req: Request,
651    /// The timeout after which the request will be considered timed out
652    pub timeout: Duration,
653}
654
655impl FrameNavigationRequest {
656    pub fn new(id: NavigationId, req: Request) -> Self {
657        Self {
658            id,
659            req,
660            timeout: Duration::from_millis(REQUEST_TIMEOUT),
661        }
662    }
663
664    /// This will set the id of the frame into the `params` `frameId` field.
665    pub fn set_frame_id(&mut self, frame_id: FrameId) {
666        if let Some(params) = self.req.params.as_object_mut() {
667            if let Entry::Vacant(entry) = params.entry("frameId") {
668                entry.insert(serde_json::Value::String(frame_id.into()));
669            }
670        }
671    }
672}
673
674#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
675pub enum LifecycleEvent {
676    #[default]
677    Load,
678    DomcontentLoaded,
679    NetworkIdle,
680    NetworkAlmostIdle,
681}
682
683impl AsRef<str> for LifecycleEvent {
684    fn as_ref(&self) -> &str {
685        match self {
686            LifecycleEvent::Load => "load",
687            LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
688            LifecycleEvent::NetworkIdle => "networkIdle",
689            LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
690        }
691    }
692}