chromiumoxide/handler/
frame.rs

1use std::collections::VecDeque;
2use std::collections::{HashMap, HashSet};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use serde_json::map::Entry;
7
8use chromiumoxide_cdp::cdp::browser_protocol::network::LoaderId;
9use chromiumoxide_cdp::cdp::browser_protocol::page::{
10    AddScriptToEvaluateOnNewDocumentParams, CreateIsolatedWorldParams, EventFrameDetached,
11    EventFrameStartedLoading, EventFrameStoppedLoading, EventLifecycleEvent,
12    EventNavigatedWithinDocument, Frame as CdpFrame, FrameTree,
13};
14use chromiumoxide_cdp::cdp::browser_protocol::target::EventAttachedToTarget;
15use chromiumoxide_cdp::cdp::js_protocol::runtime::*;
16use chromiumoxide_cdp::cdp::{
17    browser_protocol::page::{self, FrameId},
18    js_protocol::runtime,
19};
20use chromiumoxide_types::{Method, MethodId, Request};
21
22use crate::error::DeadlineExceeded;
23use crate::handler::domworld::DOMWorld;
24use crate::handler::http::HttpRequest;
25use crate::handler::REQUEST_TIMEOUT;
26use crate::{cmd::CommandChain, ArcHttpRequest};
27
28pub const UTILITY_WORLD_NAME: &str = "__chromiumoxide_utility_world__";
29const EVALUATION_SCRIPT_URL: &str = "____chromiumoxide_utility_world___evaluation_script__";
30
31/// Represents a frame on the page
32#[derive(Debug)]
33pub struct Frame {
34    parent_frame: Option<FrameId>,
35    /// Cdp identifier of this frame
36    id: FrameId,
37    main_world: DOMWorld,
38    secondary_world: DOMWorld,
39    loader_id: Option<LoaderId>,
40    /// Current url of this frame
41    url: Option<String>,
42    /// The http request that loaded this with this frame
43    http_request: ArcHttpRequest,
44    /// The frames contained in this frame
45    child_frames: HashSet<FrameId>,
46    name: Option<String>,
47    /// The received lifecycle events
48    lifecycle_events: HashSet<MethodId>,
49}
50
51impl Frame {
52    pub fn new(id: FrameId) -> Self {
53        Self {
54            parent_frame: None,
55            id,
56            main_world: Default::default(),
57            secondary_world: Default::default(),
58            loader_id: None,
59            url: None,
60            http_request: None,
61            child_frames: Default::default(),
62            name: None,
63            lifecycle_events: Default::default(),
64        }
65    }
66
67    pub fn with_parent(id: FrameId, parent: &mut Frame) -> Self {
68        parent.child_frames.insert(id.clone());
69        Self {
70            parent_frame: Some(parent.id.clone()),
71            id,
72            main_world: Default::default(),
73            secondary_world: Default::default(),
74            loader_id: None,
75            url: None,
76            http_request: None,
77            child_frames: Default::default(),
78            name: None,
79            lifecycle_events: Default::default(),
80        }
81    }
82
83    pub fn parent_id(&self) -> Option<&FrameId> {
84        self.parent_frame.as_ref()
85    }
86
87    pub fn id(&self) -> &FrameId {
88        &self.id
89    }
90
91    pub fn url(&self) -> Option<&str> {
92        self.url.as_deref()
93    }
94
95    pub fn name(&self) -> Option<&str> {
96        self.name.as_deref()
97    }
98
99    pub fn main_world(&self) -> &DOMWorld {
100        &self.main_world
101    }
102
103    pub fn secondary_world(&self) -> &DOMWorld {
104        &self.secondary_world
105    }
106
107    pub fn lifecycle_events(&self) -> &HashSet<MethodId> {
108        &self.lifecycle_events
109    }
110
111    pub fn http_request(&self) -> Option<&Arc<HttpRequest>> {
112        self.http_request.as_ref()
113    }
114
115    fn navigated(&mut self, frame: &CdpFrame) {
116        self.name.clone_from(&frame.name);
117        let url = if let Some(ref fragment) = frame.url_fragment {
118            format!("{}{fragment}", frame.url)
119        } else {
120            frame.url.clone()
121        };
122        self.url = Some(url);
123    }
124
125    fn navigated_within_url(&mut self, url: String) {
126        self.url = Some(url)
127    }
128
129    fn on_loading_stopped(&mut self) {
130        self.lifecycle_events.insert("DOMContentLoaded".into());
131        self.lifecycle_events.insert("load".into());
132    }
133
134    fn on_loading_started(&mut self) {
135        self.lifecycle_events.clear();
136        self.http_request.take();
137    }
138
139    pub fn is_loaded(&self) -> bool {
140        self.lifecycle_events.contains("load")
141    }
142
143    pub fn clear_contexts(&mut self) {
144        self.main_world.take_context();
145        self.secondary_world.take_context();
146    }
147
148    pub fn destroy_context(&mut self, ctx_unique_id: &str) {
149        if self.main_world.execution_context_unique_id() == Some(ctx_unique_id) {
150            self.main_world.take_context();
151        } else if self.secondary_world.execution_context_unique_id() == Some(ctx_unique_id) {
152            self.secondary_world.take_context();
153        }
154    }
155
156    pub fn execution_context(&self) -> Option<ExecutionContextId> {
157        self.main_world.execution_context()
158    }
159
160    pub fn set_request(&mut self, request: HttpRequest) {
161        self.http_request = Some(Arc::new(request))
162    }
163}
164
165impl From<CdpFrame> for Frame {
166    fn from(frame: CdpFrame) -> Self {
167        Self {
168            parent_frame: frame.parent_id.map(From::from),
169            id: frame.id,
170            main_world: Default::default(),
171            secondary_world: Default::default(),
172            loader_id: Some(frame.loader_id),
173            url: Some(frame.url),
174            http_request: None,
175            child_frames: Default::default(),
176            name: frame.name,
177            lifecycle_events: Default::default(),
178        }
179    }
180}
181
182/// Maintains the state of the pages frame and listens to events produced by
183/// chromium targeting the `Target`. Also listens for events that indicate that
184/// a navigation was completed
185#[derive(Debug)]
186pub struct FrameManager {
187    main_frame: Option<FrameId>,
188    frames: HashMap<FrameId, Frame>,
189    /// The contexts mapped with their frames
190    context_ids: HashMap<String, FrameId>,
191    isolated_worlds: HashSet<String>,
192    /// Timeout after which an anticipated event (related to navigation) doesn't
193    /// arrive results in an error
194    request_timeout: Duration,
195    /// Track currently in progress navigation
196    pending_navigations: VecDeque<(FrameRequestedNavigation, NavigationWatcher)>,
197    /// The currently ongoing navigation
198    navigation: Option<(NavigationWatcher, Instant)>,
199}
200
201impl FrameManager {
202    pub fn new(request_timeout: Duration) -> Self {
203        FrameManager {
204            main_frame: None,
205            frames: Default::default(),
206            context_ids: Default::default(),
207            isolated_worlds: Default::default(),
208            request_timeout,
209            pending_navigations: Default::default(),
210            navigation: None,
211        }
212    }
213
214    /// The commands to execute in order to initialize this frame manager
215    pub fn init_commands(timeout: Duration) -> CommandChain {
216        let enable = page::EnableParams::default();
217        let get_tree = page::GetFrameTreeParams::default();
218        let set_lifecycle = page::SetLifecycleEventsEnabledParams::new(true);
219        let enable_runtime = runtime::EnableParams::default();
220        let mut commands = Vec::with_capacity(4);
221
222        let enable_id = enable.identifier();
223        let get_tree_id = get_tree.identifier();
224        let set_lifecycle_id = set_lifecycle.identifier();
225        let enable_runtime_id = enable_runtime.identifier();
226
227        if let Ok(value) = serde_json::to_value(enable) {
228            commands.push((enable_id, value));
229        }
230
231        if let Ok(value) = serde_json::to_value(get_tree) {
232            commands.push((get_tree_id, value));
233        }
234
235        if let Ok(value) = serde_json::to_value(set_lifecycle) {
236            commands.push((set_lifecycle_id, value));
237        }
238
239        if let Ok(value) = serde_json::to_value(enable_runtime) {
240            commands.push((enable_runtime_id, value));
241        }
242
243        CommandChain::new(commands, timeout)
244    }
245
246    pub fn main_frame(&self) -> Option<&Frame> {
247        self.main_frame.as_ref().and_then(|id| self.frames.get(id))
248    }
249
250    pub fn main_frame_mut(&mut self) -> Option<&mut Frame> {
251        if let Some(id) = self.main_frame.as_ref() {
252            self.frames.get_mut(id)
253        } else {
254            None
255        }
256    }
257
258    pub fn frames(&self) -> impl Iterator<Item = &Frame> + '_ {
259        self.frames.values()
260    }
261
262    pub fn frame(&self, id: &FrameId) -> Option<&Frame> {
263        self.frames.get(id)
264    }
265
266    fn check_lifecycle(&self, watcher: &NavigationWatcher, frame: &Frame) -> bool {
267        watcher.expected_lifecycle.iter().all(|ev| {
268            frame.lifecycle_events.contains(ev)
269                || (frame.url.is_none() && frame.lifecycle_events.contains("DOMContentLoaded"))
270        }) && frame
271            .child_frames
272            .iter()
273            .filter_map(|f| self.frames.get(f))
274            .all(|f| self.check_lifecycle(watcher, f))
275    }
276
277    fn check_lifecycle_complete(
278        &self,
279        watcher: &NavigationWatcher,
280        frame: &Frame,
281    ) -> Option<NavigationOk> {
282        if !self.check_lifecycle(watcher, frame) {
283            return None;
284        }
285        if frame.loader_id == watcher.loader_id && !watcher.same_document_navigation {
286            return None;
287        }
288        if watcher.same_document_navigation {
289            return Some(NavigationOk::SameDocumentNavigation(watcher.id));
290        }
291        if frame.loader_id != watcher.loader_id {
292            return Some(NavigationOk::NewDocumentNavigation(watcher.id));
293        }
294        None
295    }
296
297    /// Track the request in the frame
298    pub fn on_http_request_finished(&mut self, request: HttpRequest) {
299        if let Some(id) = request.frame.as_ref() {
300            if let Some(frame) = self.frames.get_mut(id) {
301                frame.set_request(request);
302            }
303        }
304    }
305
306    pub fn poll(&mut self, now: Instant) -> Option<FrameEvent> {
307        // check if the navigation completed
308        if let Some((watcher, deadline)) = self.navigation.take() {
309            if now > deadline {
310                // navigation request timed out
311                return Some(FrameEvent::NavigationResult(Err(
312                    NavigationError::Timeout {
313                        err: DeadlineExceeded::new(now, deadline),
314                        id: watcher.id,
315                    },
316                )));
317            }
318
319            if let Some(frame) = self.frames.get(&watcher.frame_id) {
320                if let Some(nav) = self.check_lifecycle_complete(&watcher, frame) {
321                    // request is complete if the frame's lifecycle is complete = frame received all
322                    // required events
323                    return Some(FrameEvent::NavigationResult(Ok(nav)));
324                } else {
325                    // not finished yet
326                    self.navigation = Some((watcher, deadline));
327                }
328            } else {
329                return Some(FrameEvent::NavigationResult(Err(
330                    NavigationError::FrameNotFound {
331                        frame: watcher.frame_id,
332                        id: watcher.id,
333                    },
334                )));
335            }
336        } else if let Some((req, watcher)) = self.pending_navigations.pop_front() {
337            // queue in the next navigation that is must be fulfilled until `deadline`
338            let deadline = Instant::now() + req.timeout;
339            self.navigation = Some((watcher, deadline));
340            return Some(FrameEvent::NavigationRequest(req.id, req.req));
341        }
342        None
343    }
344
345    /// Entrypoint for page navigation
346    pub fn goto(&mut self, req: FrameRequestedNavigation) {
347        if let Some(frame_id) = &self.main_frame {
348            self.navigate_frame(frame_id.clone(), req);
349        }
350    }
351
352    /// Navigate a specific frame
353    pub fn navigate_frame(&mut self, frame_id: FrameId, mut req: FrameRequestedNavigation) {
354        let loader_id = self.frames.get(&frame_id).and_then(|f| f.loader_id.clone());
355        let watcher = NavigationWatcher::until_page_load(req.id, frame_id.clone(), loader_id);
356
357        // insert the frame_id in the request if not present
358        req.set_frame_id(frame_id);
359
360        self.pending_navigations.push_back((req, watcher))
361    }
362
363    /// Fired when a frame moved to another session
364    pub fn on_attached_to_target(&mut self, _event: &EventAttachedToTarget) {
365        // _onFrameMoved
366    }
367
368    pub fn on_frame_tree(&mut self, frame_tree: FrameTree) {
369        self.on_frame_attached(
370            frame_tree.frame.id.clone(),
371            frame_tree.frame.parent_id.clone().map(Into::into),
372        );
373        self.on_frame_navigated(&frame_tree.frame);
374        if let Some(children) = frame_tree.child_frames {
375            for child_tree in children {
376                self.on_frame_tree(child_tree);
377            }
378        }
379    }
380
381    pub fn on_frame_attached(&mut self, frame_id: FrameId, parent_frame_id: Option<FrameId>) {
382        if self.frames.contains_key(&frame_id) {
383            return;
384        }
385        if let Some(parent_frame_id) = parent_frame_id {
386            if let Some(parent_frame) = self.frames.get_mut(&parent_frame_id) {
387                let frame = Frame::with_parent(frame_id.clone(), parent_frame);
388                self.frames.insert(frame_id, frame);
389            }
390        }
391    }
392
393    pub fn on_frame_detached(&mut self, event: &EventFrameDetached) {
394        self.remove_frames_recursively(&event.frame_id);
395    }
396
397    pub fn on_frame_navigated(&mut self, frame: &CdpFrame) {
398        if frame.parent_id.is_some() {
399            if let Some((id, mut f)) = self.frames.remove_entry(&frame.id) {
400                for child in f.child_frames.drain() {
401                    self.remove_frames_recursively(&child);
402                }
403                f.navigated(frame);
404                self.frames.insert(id, f);
405            }
406        } else {
407            let mut f = if let Some(main) = self.main_frame.take() {
408                // update main frame
409                if let Some(mut main_frame) = self.frames.remove(&main) {
410                    for child in &main_frame.child_frames {
411                        self.remove_frames_recursively(child);
412                    }
413                    // this is necessary since we can't borrow mut and then remove recursively
414                    main_frame.child_frames.clear();
415                    main_frame.id = frame.id.clone();
416                    main_frame
417                } else {
418                    Frame::new(frame.id.clone())
419                }
420            } else {
421                // initial main frame navigation
422                Frame::new(frame.id.clone())
423            };
424            f.navigated(frame);
425            self.main_frame = Some(f.id.clone());
426            self.frames.insert(f.id.clone(), f);
427        }
428    }
429
430    pub fn on_frame_navigated_within_document(&mut self, event: &EventNavigatedWithinDocument) {
431        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
432            frame.navigated_within_url(event.url.clone());
433        }
434        if let Some((watcher, _)) = self.navigation.as_mut() {
435            watcher.on_frame_navigated_within_document(event);
436        }
437    }
438
439    pub fn on_frame_stopped_loading(&mut self, event: &EventFrameStoppedLoading) {
440        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
441            frame.on_loading_stopped();
442        }
443    }
444
445    /// Fired when frame has started loading.
446    pub fn on_frame_started_loading(&mut self, event: &EventFrameStartedLoading) {
447        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
448            frame.on_loading_started();
449        }
450    }
451
452    /// Notification is issued every time when binding is called
453    pub fn on_runtime_binding_called(&mut self, _ev: &EventBindingCalled) {}
454
455    /// Issued when new execution context is created
456    pub fn on_frame_execution_context_created(&mut self, event: &EventExecutionContextCreated) {
457        if let Some(frame_id) = event
458            .context
459            .aux_data
460            .as_ref()
461            .and_then(|v| v["frameId"].as_str())
462        {
463            if let Some(frame) = self.frames.get_mut(frame_id) {
464                if event
465                    .context
466                    .aux_data
467                    .as_ref()
468                    .and_then(|v| v["isDefault"].as_bool())
469                    .unwrap_or_default()
470                {
471                    frame
472                        .main_world
473                        .set_context(event.context.id, event.context.unique_id.clone());
474                } else if event.context.name == UTILITY_WORLD_NAME
475                    && frame.secondary_world.execution_context().is_none()
476                {
477                    frame
478                        .secondary_world
479                        .set_context(event.context.id, event.context.unique_id.clone());
480                }
481                self.context_ids
482                    .insert(event.context.unique_id.clone(), frame.id.clone());
483            }
484        }
485        if event
486            .context
487            .aux_data
488            .as_ref()
489            .filter(|v| v["type"].as_str() == Some("isolated"))
490            .is_some()
491        {
492            self.isolated_worlds.insert(event.context.name.clone());
493        }
494    }
495
496    /// Issued when execution context is destroyed
497    pub fn on_frame_execution_context_destroyed(&mut self, event: &EventExecutionContextDestroyed) {
498        if let Some(id) = self.context_ids.remove(&event.execution_context_unique_id) {
499            if let Some(frame) = self.frames.get_mut(&id) {
500                frame.destroy_context(&event.execution_context_unique_id);
501            }
502        }
503    }
504
505    /// Issued when all executionContexts were cleared
506    pub fn on_execution_contexts_cleared(&mut self) {
507        for id in self.context_ids.values() {
508            if let Some(frame) = self.frames.get_mut(id) {
509                frame.clear_contexts();
510            }
511        }
512        self.context_ids.clear()
513    }
514
515    /// Fired for top level page lifecycle events (nav, load, paint, etc.)
516    pub fn on_page_lifecycle_event(&mut self, event: &EventLifecycleEvent) {
517        if let Some(frame) = self.frames.get_mut(&event.frame_id) {
518            if event.name == "init" {
519                frame.loader_id = Some(event.loader_id.clone());
520                frame.lifecycle_events.clear();
521            }
522            frame.lifecycle_events.insert(event.name.clone().into());
523        }
524    }
525
526    /// Detach all child frames
527    fn remove_frames_recursively(&mut self, id: &FrameId) -> Option<Frame> {
528        if let Some(mut frame) = self.frames.remove(id) {
529            for child in &frame.child_frames {
530                self.remove_frames_recursively(child);
531            }
532            if let Some(parent_id) = frame.parent_frame.take() {
533                if let Some(parent) = self.frames.get_mut(&parent_id) {
534                    parent.child_frames.remove(&frame.id);
535                }
536            }
537            Some(frame)
538        } else {
539            None
540        }
541    }
542
543    pub fn ensure_isolated_world(&mut self, world_name: &str) -> Option<CommandChain> {
544        if self.isolated_worlds.contains(world_name) {
545            return None;
546        }
547
548        self.isolated_worlds.insert(world_name.to_string());
549
550        let cmd = AddScriptToEvaluateOnNewDocumentParams::builder()
551            .source(format!("//# sourceURL={EVALUATION_SCRIPT_URL}"))
552            .world_name(world_name)
553            .build()
554            .unwrap();
555
556        let mut cmds = Vec::with_capacity(self.frames.len() + 1);
557
558        cmds.push((cmd.identifier(), serde_json::to_value(cmd).unwrap()));
559
560        let cm = self.frames.keys().filter_map(|id| {
561            if let Ok(cmd) = CreateIsolatedWorldParams::builder()
562                .frame_id(id.clone())
563                .grant_univeral_access(true)
564                .world_name(world_name)
565                .build()
566            {
567                let cm = (
568                    cmd.identifier(),
569                    serde_json::to_value(cmd).unwrap_or_default(),
570                );
571
572                Some(cm)
573            } else {
574                None
575            }
576        });
577
578        cmds.extend(cm);
579
580        Some(CommandChain::new(cmds, self.request_timeout))
581    }
582}
583
584#[derive(Debug)]
585pub enum FrameEvent {
586    /// A previously submitted navigation has finished
587    NavigationResult(Result<NavigationOk, NavigationError>),
588    /// A new navigation request needs to be submitted
589    NavigationRequest(NavigationId, Request),
590    /* /// The initial page of the target has been loaded
591     * InitialPageLoadFinished */
592}
593
594#[derive(Debug)]
595pub enum NavigationError {
596    Timeout {
597        id: NavigationId,
598        err: DeadlineExceeded,
599    },
600    FrameNotFound {
601        id: NavigationId,
602        frame: FrameId,
603    },
604}
605
606impl NavigationError {
607    pub fn navigation_id(&self) -> &NavigationId {
608        match self {
609            NavigationError::Timeout { id, .. } => id,
610            NavigationError::FrameNotFound { id, .. } => id,
611        }
612    }
613}
614
615#[derive(Debug, Clone, Eq, PartialEq)]
616pub enum NavigationOk {
617    SameDocumentNavigation(NavigationId),
618    NewDocumentNavigation(NavigationId),
619}
620
621impl NavigationOk {
622    pub fn navigation_id(&self) -> &NavigationId {
623        match self {
624            NavigationOk::SameDocumentNavigation(id) => id,
625            NavigationOk::NewDocumentNavigation(id) => id,
626        }
627    }
628}
629
630/// Tracks the progress of an issued `Page.navigate` request until completion.
631#[derive(Debug)]
632pub struct NavigationWatcher {
633    id: NavigationId,
634    expected_lifecycle: HashSet<MethodId>,
635    frame_id: FrameId,
636    loader_id: Option<LoaderId>,
637    /// Once we receive the response to the issued `Page.navigate` request we
638    /// can detect whether we were navigating withing the same document or were
639    /// navigating to a new document by checking if a loader was included in the
640    /// response.
641    same_document_navigation: bool,
642}
643
644impl NavigationWatcher {
645    pub fn until_page_load(id: NavigationId, frame: FrameId, loader_id: Option<LoaderId>) -> Self {
646        Self {
647            id,
648            expected_lifecycle: std::iter::once("load".into()).collect(),
649            loader_id,
650            frame_id: frame,
651            same_document_navigation: false,
652        }
653    }
654
655    /// Checks whether the navigation was completed
656    pub fn is_lifecycle_complete(&self) -> bool {
657        self.expected_lifecycle.is_empty()
658    }
659
660    fn on_frame_navigated_within_document(&mut self, ev: &EventNavigatedWithinDocument) {
661        if self.frame_id == ev.frame_id {
662            self.same_document_navigation = true;
663        }
664    }
665}
666
667/// An identifier for an ongoing navigation
668#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
669pub struct NavigationId(pub usize);
670
671/// Represents a the request for a navigation
672#[derive(Debug)]
673pub struct FrameRequestedNavigation {
674    /// The internal identifier
675    pub id: NavigationId,
676    /// the cdp request that will trigger the navigation
677    pub req: Request,
678    /// The timeout after which the request will be considered timed out
679    pub timeout: Duration,
680}
681
682impl FrameRequestedNavigation {
683    pub fn new(id: NavigationId, req: Request) -> Self {
684        Self {
685            id,
686            req,
687            timeout: Duration::from_millis(REQUEST_TIMEOUT),
688        }
689    }
690
691    /// This will set the id of the frame into the `params` `frameId` field.
692    pub fn set_frame_id(&mut self, frame_id: FrameId) {
693        if let Some(params) = self.req.params.as_object_mut() {
694            if let Entry::Vacant(entry) = params.entry("frameId") {
695                entry.insert(serde_json::Value::String(frame_id.into()));
696            }
697        }
698    }
699}
700
701#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
702pub enum LifecycleEvent {
703    #[default]
704    Load,
705    DomcontentLoaded,
706    NetworkIdle,
707    NetworkAlmostIdle,
708}
709
710impl AsRef<str> for LifecycleEvent {
711    fn as_ref(&self) -> &str {
712        match self {
713            LifecycleEvent::Load => "load",
714            LifecycleEvent::DomcontentLoaded => "DOMContentLoaded",
715            LifecycleEvent::NetworkIdle => "networkIdle",
716            LifecycleEvent::NetworkAlmostIdle => "networkAlmostIdle",
717        }
718    }
719}