chromiumoxide/handler/
frame.rs

1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10    AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11    EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12    EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17    browser_protocol::page::{self, FrameId},
18    js_protocol::runtime,
19};
20use chromiumoxide_types::{Method, MethodId, Request};
21
22use crate::error::DeadlineExceeded;
23use crate::handler::domworld::DOMWorld;
24use crate::handler::http::HttpRequest;
25use crate::handler::REQUEST_TIMEOUT;
26use crate::{cmd::CommandChain, ArcHttpRequest};
27
28pub const UTILITY_WORLD_NAME: &str = "__chromiumoxide_utility_world__";
29const EVALUATION_SCRIPT_URL: &str = "____chromiumoxide_utility_world___evaluation_script__";
30
31/// Represents a frame on the page
32#[derive(Debug)]
33pub struct Frame {
34    parent_frame: Option<FrameId>,
35    /// Cdp identifier of this frame
36    id: FrameId,
37    main_world: DOMWorld,
38    secondary_world: DOMWorld,
39    loader_id: Option<LoaderId>,
40    /// Current url of this frame
41    url: Option<String>,
42    /// The http request that loaded this with this frame
43    http_request: ArcHttpRequest,
44    /// The frames contained in this frame
45    child_frames: HashSet<FrameId>,
46    name: Option<String>,
47    /// The received lifecycle events
48    lifecycle_events: HashSet<MethodId>,
49}
50
51impl Frame {
52    pub fn new(id: FrameId) -> Self {
53        Self {
54            parent_frame: None,
55            id,
56            main_world: Default::default(),
57            secondary_world: Default::default(),
58            loader_id: None,
59            url: None,
60            http_request: None,
61            child_frames: Default::default(),
62            name: None,
63            lifecycle_events: Default::default(),
64        }
65    }
66
67    pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
68        parent.child_frames.insert(id.clone());
69        Self {
70            parent_frame: Some(parent.id.clone()),
71            id,
72            main_world: Default::default(),
73            secondary_world: Default::default(),
74            loader_id: None,
75            url: None,
76            http_request: None,
77            child_frames: Default::default(),
78            name: None,
79            lifecycle_events: Default::default(),
80        }
81    }
82
83    pub fn parent_id(&self) -> Option<&FrameId> {
84        self.parent_frame.as_ref()
85    }
86
87    pub fn id(&self) -> &FrameId {
88        &self.id
89    }
90
91    pub fn url(&self) -> Option<&str> {
92        self.url.as_deref()
93    }
94
95    pub fn name(&self) -> Option<&str> {
96        self.name.as_deref()
97    }
98
99    pub fn main_world(&self) -> &DOMWorld {
100        &self.main_world
101    }
102
103    pub fn secondary_world(&self) -> &DOMWorld {
104        &self.secondary_world
105    }
106
107    pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
108        &self.lifecycle_events
109    }
110
111    pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
112        self.http_request.as_ref()
113    }
114
115    fn navigated(&mut self, frame: &CdpFrame) {
116        self.name.clone_from(&frame.name);
117        let url = if let Some(ref fragment) = frame.url_fragment {
118            format!("{}{fragment}", frame.url)
119        } else {
120            frame.url.clone()
121        };
122        self.url = Some(url);
123    }
124
125    fn navigated_within_url(&mut self, url: String) {
126        self.url = Some(url)
127    }
128
129    fn on_loading_stopped(&mut self) {
130        self.lifecycle_events.insert("DOMContentLoaded".into());
131        self.lifecycle_events.insert("load".into());
132    }
133
134    fn on_loading_started(&mut self) {
135        self.lifecycle_events.clear();
136        self.http_request.take();
137    }
138
139    pub fn is_loaded(&self) -> bool {
140        self.lifecycle_events.contains("load")
141    }
142
143    pub fn clear_contexts(&mut self) {
144        self.main_world.take_context();
145        self.secondary_world.take_context();
146    }
147
148    pub fn destroy_context(&mut self, ctx_unique_id: &str) {
149        if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
150            self.main_world.take_context();
151        } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
152            self.secondary_world.take_context();
153        }
154    }
155
156    pub fn execution_context(&self) -> Option<ExecutionContextId> {
157        self.main_world.execution_context()
158    }
159
160    pub fn set_request(&mut self, request: HttpRequest) {
161        self.http_request = Some(Arc::new(request))
162    }
163}
164
165impl From<CdpFrame> for Frame {
166    fn from(frame: CdpFrame) -> Self {
167        Self {
168            parent_frame: frame.parent_id.map(From::from),
169            id: frame.id,
170            main_world: Default::default(),
171            secondary_world: Default::default(),
172            loader_id: Some(frame.loader_id),
173            url: Some(frame.url),
174            http_request: None,
175            child_frames: Default::default(),
176            name: frame.name,
177            lifecycle_events: Default::default(),
178        }
179    }
180}
181
182/// Maintains the state of the pages frame and listens to events produced by
183/// chromium targeting the `Target`. Also listens for events that indicate that
184/// a navigation was completed
185#[derive(Debug)]
186pub struct FrameManager {
187    main_frame: Option<FrameId>,
188    frames: HashMap<FrameId, Frame>,
189    /// The contexts mapped with their frames
190    context_ids: HashMap<String, FrameId>,
191    isolated_worlds: HashSet<String>,
192    /// Timeout after which an anticipated event (related to navigation) doesn't
193    /// arrive results in an error
194    request_timeout: Duration,
195    /// Track currently in progress navigation
196    pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
197    /// The currently ongoing navigation
198    navigation: Option<(NavigationWatcher, Instant)>,
199}
200
201impl FrameManager {
202    pub fn new(request_timeout: Duration) -> Self {
203        FrameManager {
204            main_frame: None,
205            frames: Default::default(),
206            context_ids: Default::default(),
207            isolated_worlds: Default::default(),
208            request_timeout,
209            pending_navigations: Default::default(),
210            navigation: None,
211        }
212    }
213
214    /// The commands to execute in order to initialize this frame manager
215    pub fn init_commands(timeout: Duration) -> CommandChain {
216        let enable = page::EnableParams::default();
217        let get_tree = page::GetFrameTreeParams::default();
218        let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
219        let enable_runtime = runtime::EnableParams::default();
220        let mut commands = Vec::with_capacity(4);
221
222        let enable_id = enable.identifier();
223        let get_tree_id = get_tree.identifier();
224        let set_lifecycle_id = set_lifecycle.identifier();
225        let enable_runtime_id = enable_runtime.identifier();
226
227        if let Ok(value) = serde_json::to_value(enable) {
228            commands.push((enable_id, value));
229        }
230
231        if let Ok(value) = serde_json::to_value(get_tree) {
232            commands.push((get_tree_id, value));
233        }
234
235        if let Ok(value) = serde_json::to_value(set_lifecycle) {
236            commands.push((set_lifecycle_id, value));
237        }
238
239        if let Ok(value) = serde_json::to_value(enable_runtime) {
240            commands.push((enable_runtime_id, value));
241        }
242
243        CommandChain::new(commands, timeout)
244    }
245
246    pub fn main_frame(&self) -> Option<&Frame> {
247        self.main_frame.as_ref().and_then(|id| self.frames.get(id))
248    }
249
250    pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
251        if let Some(id) = self.main_frame.as_ref() {
252            self.frames.get_mut(id)
253        } else {
254            None
255        }
256    }
257
258    pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
259        self.frames.values()
260    }
261
262    pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
263        self.frames.get(id)
264    }
265
266    fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
267        watcher.expected_lifecycle.iter().all(|ev| {
268            frame.lifecycle_events.contains(ev)
269                || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
270        }) && frame
271            .child_frames
272            .iter()
273            .filter_map(|f| self.frames.get(f))
274            .all(|f| self.check_lifecycle(watcher, f))
275    }
276
277    fn check_lifecycle_complete(
278        &self,
279        watcher: &NavigationWatcher,
280        frame: &Frame,
281    ) -> Option<NavigationOk> {
282        if !self.check_lifecycle(watcher, frame) {
283            return None;
284        }
285        if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
286            return None;
287        }
288        if watcher.same_document_navigation {
289            return Some(NavigationOk::SameDocumentNavigation(watcher.id));
290        }
291        if frame.loader_id != watcher.loader_id {
292            return Some(NavigationOk::NewDocumentNavigation(watcher.id));
293        }
294        None
295    }
296
297    /// Track the request in the frame
298    pub fn on_http_request_finished(&mut self, request: HttpRequest) {
299        if let Some(id) = request.frame.as_ref() {
300            if let Some(frame) = self.frames.get_mut(id) {
301                frame.set_request(request);
302            }
303        }
304    }
305
306    pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
307        // check if the navigation completed
308        if let Some((watcher, deadline)) = self.navigation.take() {
309            if now > deadline {
310                // navigation request timed out
311                return Some(FrameEvent::NavigationResult(Err(
312                    NavigationError::Timeout {
313                        err: DeadlineExceeded::new(now, deadline),
314                        id: watcher.id,
315                    },
316                )));
317            }
318            if let Some(frame) = self.frames.get(&watcher.frame_id) {
319                if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
320                    // request is complete if the frame's lifecycle is complete = frame received all
321                    // required events
322                    return Some(FrameEvent::NavigationResult(Ok(nav)));
323                } else {
324                    // not finished yet
325                    self.navigation = Some((watcher, deadline));
326                }
327            } else {
328                return Some(FrameEvent::NavigationResult(Err(
329                    NavigationError::FrameNotFound {
330                        frame: watcher.frame_id,
331                        id: watcher.id,
332                    },
333                )));
334            }
335        } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
336            // queue in the next navigation that is must be fulfilled until `deadline`
337            let deadline = Instant::now() + req.timeout;
338            self.navigation = Some((watcher, deadline));
339            return Some(FrameEvent::NavigationRequest(req.id, req.req));
340        }
341        None
342    }
343
344    /// Entrypoint for page navigation
345    pub fn goto(&mut self, req: FrameRequestedNavigation) {
346        if let Some(frame_id) = self.main_frame.clone() {
347            self.navigate_frame(frame_id, req);
348        }
349    }
350
351    /// Navigate a specific frame
352    pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
353        let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
354        let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
355        // insert the frame_id in the request if not present
356        req.set_frame_id(frame_id);
357        self.pending_navigations.push_back((req, watcher))
358    }
359
360    /// Fired when a frame moved to another session
361    pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
362        // _onFrameMoved
363    }
364
365    pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
366        self.on_frame_attached(
367            frame_tree.frame.id.clone(),
368            frame_tree.frame.parent_id.clone().map(Into::into),
369        );
370        self.on_frame_navigated(&frame_tree.frame);
371        if let Some(children) = frame_tree.child_frames {
372            for child_tree in children {
373                self.on_frame_tree(child_tree);
374            }
375        }
376    }
377
378    pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
379        if self.frames.contains_key(&frame_id) {
380            return;
381        }
382        if let Some(parent_frame_id) = parent_frame_id {
383            if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
384                let frame = Frame::with_parent(frame_id.clone(), parent_frame);
385                self.frames.insert(frame_id, frame);
386            }
387        }
388    }
389
390    pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
391        self.remove_frames_recursively(&event.frame_id);
392    }
393
394    pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
395        if frame.parent_id.is_some() {
396            if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
397                for child in f.child_frames.drain() {
398                    self.remove_frames_recursively(&child);
399                }
400                f.navigated(frame);
401                self.frames.insert(id, f);
402            }
403        } else {
404            let mut f = if let Some(main) = self.main_frame.take() {
405                // update main frame
406                if let Some(mut main_frame) = self.frames.remove(&main) {
407                    for child in &main_frame.child_frames {
408                        self.remove_frames_recursively(child);
409                    }
410                    // this is necessary since we can't borrow mut and then remove recursively
411                    main_frame.child_frames.clear();
412                    main_frame.id = frame.id.clone();
413                    main_frame
414                } else {
415                    Frame::new(frame.id.clone())
416                }
417            } else {
418                // initial main frame navigation
419                Frame::new(frame.id.clone())
420            };
421            f.navigated(frame);
422            self.main_frame = Some(f.id.clone());
423            self.frames.insert(f.id.clone(), f);
424        }
425    }
426
427    pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
428        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
429            frame.navigated_within_url(event.url.clone());
430        }
431        if let Some((watcher, _)) = self.navigation.as_mut() {
432            watcher.on_frame_navigated_within_document(event);
433        }
434    }
435
436    pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
437        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
438            frame.on_loading_stopped();
439        }
440    }
441
442    /// Fired when frame has started loading.
443    pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
444        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
445            frame.on_loading_started();
446        }
447    }
448
449    /// Notification is issued every time when binding is called
450    pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
451
452    /// Issued when new execution context is created
453    pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
454        if let Some(frame_id) = event
455            .context
456            .aux_data
457            .as_ref()
458            .and_then(|v| v["frameId"].as_str())
459        {
460            if let Some(frame) = self.frames.get_mut(frame_id) {
461                if event
462                    .context
463                    .aux_data
464                    .as_ref()
465                    .and_then(|v| v["isDefault"].as_bool())
466                    .unwrap_or_default()
467                {
468                    frame
469                        .main_world
470                        .set_context(event.context.id, event.context.unique_id.clone());
471                } else if event.context.name == UTILITY_WORLD_NAME
472                    && frame.secondary_world.execution_context().is_none()
473                {
474                    frame
475                        .secondary_world
476                        .set_context(event.context.id, event.context.unique_id.clone());
477                }
478                self.context_ids
479                    .insert(event.context.unique_id.clone(), frame.id.clone());
480            }
481        }
482        if event
483            .context
484            .aux_data
485            .as_ref()
486            .filter(|v| v["type"].as_str() == Some("isolated"))
487            .is_some()
488        {
489            self.isolated_worlds.insert(event.context.name.clone());
490        }
491    }
492
493    /// Issued when execution context is destroyed
494    pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
495        if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
496            if let Some(frame) = self.frames.get_mut(&id) {
497                frame.destroy_context(&event.execution_context_unique_id);
498            }
499        }
500    }
501
502    /// Issued when all executionContexts were cleared
503    pub fn on_execution_contexts_cleared(&mut self) {
504        for id in self.context_ids.values() {
505            if let Some(frame) = self.frames.get_mut(id) {
506                frame.clear_contexts();
507            }
508        }
509        self.context_ids.clear()
510    }
511
512    /// Fired for top level page lifecycle events (nav, load, paint, etc.)
513    pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
514        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
515            if event.name == "init" {
516                frame.loader_id = Some(event.loader_id.clone());
517                frame.lifecycle_events.clear();
518            }
519            frame.lifecycle_events.insert(event.name.clone().into());
520        }
521    }
522
523    /// Detach all child frames
524    fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
525        if let Some(mut frame) = self.frames.remove(id) {
526            for child in &frame.child_frames {
527                self.remove_frames_recursively(child);
528            }
529            if let Some(parent_id) = frame.parent_frame.take() {
530                if let Some(parent) = self.frames.get_mut(&parent_id) {
531                    parent.child_frames.remove(&frame.id);
532                }
533            }
534            Some(frame)
535        } else {
536            None
537        }
538    }
539
540    pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
541        if self.isolated_worlds.contains(world_name) {
542            return None;
543        }
544
545        self.isolated_worlds.insert(world_name.to_string());
546
547        let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
548            .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
549            .world_name(world_name)
550            .build()
551            .unwrap();
552
553        let mut cmds = Vec::with_capacity(self.frames.len() + 1);
554
555        cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
556
557        let cm = self.frames.keys().filter_map(|id| {
558            if let Ok(cmd) = CreateIsolatedWorldParams::builder()
559                .frame_id(id.clone())
560                .grant_univeral_access(true)
561                .world_name(world_name)
562                .build()
563            {
564                let cm = (
565                    cmd.identifier(),
566                    serde_json::to_value(cmd).unwrap_or_default(),
567                );
568
569                Some(cm)
570            } else {
571                None
572            }
573        });
574
575        cmds.extend(cm);
576
577        Some(CommandChain::new(cmds, self.request_timeout))
578    }
579}
580
581#[derive(Debug)]
582pub enum FrameEvent {
583    /// A previously submitted navigation has finished
584    NavigationResult(Result<NavigationOk, NavigationError>),
585    /// A new navigation request needs to be submitted
586    NavigationRequest(NavigationId, Request),
587    /* /// The initial page of the target has been loaded
588     * InitialPageLoadFinished */
589}
590
591#[derive(Debug)]
592pub enum NavigationError {
593    Timeout {
594        id: NavigationId,
595        err: DeadlineExceeded,
596    },
597    FrameNotFound {
598        id: NavigationId,
599        frame: FrameId,
600    },
601}
602
603impl NavigationError {
604    pub fn navigation_id(&self) -> &NavigationId {
605        match self {
606            NavigationError::Timeout { id, .. } => id,
607            NavigationError::FrameNotFound { id, .. } => id,
608        }
609    }
610}
611
612#[derive(Debug, Clone, Eq, PartialEq)]
613pub enum NavigationOk {
614    SameDocumentNavigation(NavigationId),
615    NewDocumentNavigation(NavigationId),
616}
617
618impl NavigationOk {
619    pub fn navigation_id(&self) -> &NavigationId {
620        match self {
621            NavigationOk::SameDocumentNavigation(id) => id,
622            NavigationOk::NewDocumentNavigation(id) => id,
623        }
624    }
625}
626
627/// Tracks the progress of an issued `Page.navigate` request until completion.
628#[derive(Debug)]
629pub struct NavigationWatcher {
630    id: NavigationId,
631    expected_lifecycle: HashSet<MethodId>,
632    frame_id: FrameId,
633    loader_id: Option<LoaderId>,
634    /// Once we receive the response to the issued `Page.navigate` request we
635    /// can detect whether we were navigating withing the same document or were
636    /// navigating to a new document by checking if a loader was included in the
637    /// response.
638    same_document_navigation: bool,
639}
640
641impl NavigationWatcher {
642    pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
643        Self {
644            id,
645            expected_lifecycle: std::iter::once("load".into()).collect(),
646            loader_id,
647            frame_id: frame,
648            same_document_navigation: false,
649        }
650    }
651
652    /// Checks whether the navigation was completed
653    pub fn is_lifecycle_complete(&self) -> bool {
654        self.expected_lifecycle.is_empty()
655    }
656
657    fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
658        if self.frame_id == ev.frame_id {
659            self.same_document_navigation = true;
660        }
661    }
662}
663
664/// An identifier for an ongoing navigation
665#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
666pub struct NavigationId(pub usize);
667
668/// Represents a the request for a navigation
669#[derive(Debug)]
670pub struct FrameRequestedNavigation {
671    /// The internal identifier
672    pub id: NavigationId,
673    /// the cdp request that will trigger the navigation
674    pub req: Request,
675    /// The timeout after which the request will be considered timed out
676    pub timeout: Duration,
677}
678
679impl FrameRequestedNavigation {
680    pub fn new(id: NavigationId, req: Request) -> Self {
681        Self {
682            id,
683            req,
684            timeout: Duration::from_millis(REQUEST_TIMEOUT),
685        }
686    }
687
688    /// This will set the id of the frame into the `params` `frameId` field.
689    pub fn set_frame_id(&mut self, frame_id: FrameId) {
690        if let Some(params) = self.req.params.as_object_mut() {
691            if let Entry::Vacant(entry) = params.entry("frameId") {
692                entry.insert(serde_json::Value::String(frame_id.into()));
693            }
694        }
695    }
696}
697
698#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
699pub enum LifecycleEvent {
700    #[default]
701    Load,
702    DomcontentLoaded,
703    NetworkIdle,
704    NetworkAlmostIdle,
705}
706
707impl AsRef<str> for LifecycleEvent {
708    fn as_ref(&self) -> &str {
709        match self {
710            LifecycleEvent::Load => "load",
711            LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
712            LifecycleEvent::NetworkIdle => "networkIdle",
713            LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
714        }
715    }
716}