viewpoint_core/page/aria_snapshot/
mod.rs

1//! Page-level ARIA accessibility snapshot methods.
2//!
3//! This module provides methods for capturing accessibility snapshots that span
4//! multiple frames, stitching together the accessibility trees from each frame
5//! into a complete representation of the page.
6//!
7//! # Frame Boundary Handling
8//!
9//! When capturing aria snapshots, iframes are marked as frame boundaries with
10//! `is_frame: true`. The `aria_snapshot_with_frames()` method captures snapshots
11//! from all frames and stitches them together at the iframe boundaries.
12//!
13//! # Cross-Origin Limitations
14//!
15//! Due to browser security restrictions:
16//! - Same-origin iframes: Content is fully captured and stitched
17//! - Cross-origin iframes: Marked as boundaries with `is_frame: true` but content
18//!   may be limited or empty depending on CDP permissions
19//!
20//! # Example
21//!
22//! ```no_run
23//! use viewpoint_core::Page;
24//!
25//! # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
26//! // Capture complete accessibility tree including iframes
27//! let snapshot = page.aria_snapshot_with_frames().await?;
28//! println!("{}", snapshot);
29//!
30//! // The snapshot will include all frame content stitched together
31//! // Iframes are represented with their content inline
32//! # Ok(())
33//! # }
34//! ```
35
36use std::collections::HashMap;
37
38use tracing::{debug, instrument, warn};
39
40use super::locator::AriaSnapshot;
41use super::Page;
42use crate::error::PageError;
43
44impl Page {
45    /// Capture an ARIA accessibility snapshot of the entire page including all frames.
46    ///
47    /// This method captures the accessibility tree of the main frame and all child
48    /// frames (iframes), then stitches them together into a single tree. Frame
49    /// boundaries in the main frame snapshot are replaced with the actual content
50    /// from the corresponding frames.
51    ///
52    /// # Frame Content Stitching
53    ///
54    /// The method works by:
55    /// 1. Capturing the main frame's aria snapshot (which marks iframes as boundaries)
56    /// 2. Getting the frame tree from CDP
57    /// 3. For each child frame, capturing its aria snapshot
58    /// 4. Stitching child frame content into the parent snapshot at iframe boundaries
59    ///
60    /// # Cross-Origin Frames
61    ///
62    /// For cross-origin frames, CDP may still be able to capture content through
63    /// out-of-process iframe (OOPIF) handling. However, some content may be
64    /// inaccessible due to browser security policies. In such cases, the frame
65    /// boundary will remain with `is_frame: true` but may have limited or no children.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use viewpoint_core::Page;
71    ///
72    /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
73    /// let snapshot = page.aria_snapshot_with_frames().await?;
74    ///
75    /// // The snapshot YAML output will show frame content inline:
76    /// // - document "Main Page"
77    /// //   - heading "Title"
78    /// //   - iframe "Widget Frame" [frame-boundary]
79    /// //     - document "Widget"
80    /// //       - button "Click me"
81    /// println!("{}", snapshot);
82    /// # Ok(())
83    /// # }
84    /// ```
85    ///
86    /// # Errors
87    ///
88    /// Returns an error if:
89    /// - The page is closed
90    /// - Frame tree retrieval fails
91    /// - Snapshot capture fails for the main frame
92    #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
93    pub async fn aria_snapshot_with_frames(&self) -> Result<AriaSnapshot, PageError> {
94        if self.closed {
95            return Err(PageError::Closed);
96        }
97
98        // Get the main frame snapshot first
99        let main_frame = self.main_frame().await?;
100        let mut root_snapshot = main_frame.aria_snapshot().await?;
101
102        // Get all frames
103        let frames = self.frames().await?;
104
105        // Build a map of frame URL/name to captured snapshots
106        let mut frame_snapshots: HashMap<String, AriaSnapshot> = HashMap::new();
107
108        for frame in &frames {
109            if !frame.is_main() {
110                // Capture snapshot for this frame
111                match frame.aria_snapshot().await {
112                    Ok(snapshot) => {
113                        let url = frame.url();
114                        if !url.is_empty() && url != "about:blank" {
115                            frame_snapshots.insert(url.clone(), snapshot.clone());
116                        }
117                        let name = frame.name();
118                        if !name.is_empty() {
119                            frame_snapshots.insert(name.clone(), snapshot.clone());
120                        }
121                        // Also store by frame ID
122                        frame_snapshots.insert(frame.id().to_string(), snapshot);
123                    }
124                    Err(e) => {
125                        warn!(
126                            error = %e,
127                            frame_id = %frame.id(),
128                            frame_url = %frame.url(),
129                            "Failed to capture frame snapshot, skipping"
130                        );
131                    }
132                }
133            }
134        }
135
136        // Stitch frame content into the snapshot
137        stitch_frame_content(&mut root_snapshot, &frame_snapshots, 0);
138
139        Ok(root_snapshot)
140    }
141
142    /// Capture an ARIA accessibility snapshot of just the main frame.
143    ///
144    /// This is a convenience method equivalent to calling `main_frame().await?.aria_snapshot().await`.
145    /// Unlike `aria_snapshot_with_frames()`, this does NOT stitch in iframe content -
146    /// iframes are left as boundaries with `is_frame: true`.
147    ///
148    /// # Example
149    ///
150    /// ```no_run
151    /// use viewpoint_core::Page;
152    ///
153    /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
154    /// // Quick snapshot without frame content
155    /// let snapshot = page.aria_snapshot().await?;
156    ///
157    /// // Check if there are frame boundaries to expand
158    /// if !snapshot.iframe_refs.is_empty() {
159    ///     println!("Page has {} frames that can be expanded", snapshot.iframe_refs.len());
160    /// }
161    /// # Ok(())
162    /// # }
163    /// ```
164    ///
165    /// # Errors
166    ///
167    /// Returns an error if:
168    /// - The page is closed
169    /// - Snapshot capture fails
170    #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
171    pub async fn aria_snapshot(&self) -> Result<AriaSnapshot, PageError> {
172        if self.closed {
173            return Err(PageError::Closed);
174        }
175
176        let main_frame = self.main_frame().await?;
177        main_frame.aria_snapshot().await
178    }
179}
180
181/// Recursively stitch frame content into aria snapshot at iframe boundaries.
182///
183/// This function traverses the snapshot tree looking for nodes with `is_frame: true`.
184/// When found, it attempts to find the corresponding frame snapshot and adds that
185/// content as children of the iframe node.
186fn stitch_frame_content(
187    snapshot: &mut AriaSnapshot,
188    frame_snapshots: &HashMap<String, AriaSnapshot>,
189    depth: usize,
190) {
191    // Prevent infinite recursion - max depth of 10 nested frames
192    const MAX_DEPTH: usize = 10;
193    if depth > MAX_DEPTH {
194        warn!(
195            depth = depth,
196            "Max frame nesting depth exceeded, stopping recursion"
197        );
198        return;
199    }
200
201    // If this is a frame boundary, try to get its content
202    if snapshot.is_frame == Some(true) {
203        // Try to find the matching frame snapshot
204        let frame_snapshot = snapshot
205            .frame_url
206            .as_ref()
207            .and_then(|url| frame_snapshots.get(url))
208            .or_else(|| {
209                snapshot
210                    .frame_name
211                    .as_ref()
212                    .and_then(|name| frame_snapshots.get(name))
213            });
214
215        if let Some(frame_content) = frame_snapshot {
216            debug!(
217                frame_url = ?snapshot.frame_url,
218                frame_name = ?snapshot.frame_name,
219                depth = depth,
220                "Stitching frame content into snapshot"
221            );
222
223            // Add the frame's content as children of this iframe node
224            // Clear is_frame to prevent re-processing this boundary
225            snapshot.is_frame = Some(false);
226            snapshot.children = vec![frame_content.clone()];
227        } else {
228            debug!(
229                frame_url = ?snapshot.frame_url,
230                frame_name = ?snapshot.frame_name,
231                "No matching frame snapshot found for iframe boundary"
232            );
233        }
234    }
235
236    // Recursively process children
237    for child in &mut snapshot.children {
238        stitch_frame_content(child, frame_snapshots, depth + 1);
239    }
240}