viewpoint_core/page/aria_snapshot/
mod.rs

1//! Page-level ARIA accessibility snapshot methods.
2//!
3//! This module provides methods for capturing accessibility snapshots that span
4//! multiple frames, stitching together the accessibility trees from each frame
5//! into a complete representation of the page.
6//!
7//! # Performance
8//!
9//! Snapshot capture is optimized for performance through:
10//! - **Parallel node resolution**: Multiple `DOM.describeNode` CDP calls are executed
11//!   concurrently (up to 50 by default) instead of sequentially
12//! - **Batch array access**: Element object IDs are retrieved in a single CDP call
13//!   using `Runtime.getProperties` instead of N individual calls
14//! - **Parallel frame capture**: Multi-frame snapshots capture all child frames
15//!   concurrently instead of sequentially
16//!
17//! These optimizations can provide 10-20x performance improvement for large DOMs.
18//!
19//! # Configuration
20//!
21//! Use [`SnapshotOptions`] to tune snapshot behavior:
22//!
23//! ```no_run
24//! use viewpoint_core::{Page, SnapshotOptions};
25//!
26//! # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
27//! // Default options (include refs, 50 concurrent CDP calls)
28//! let snapshot = page.aria_snapshot().await?;
29//!
30//! // Skip ref resolution for maximum performance
31//! let options = SnapshotOptions::default().include_refs(false);
32//! let snapshot = page.aria_snapshot_with_options(options).await?;
33//!
34//! // Increase concurrency for fast networks
35//! let options = SnapshotOptions::default().max_concurrency(100);
36//! let snapshot = page.aria_snapshot_with_options(options).await?;
37//! # Ok(())
38//! # }
39//! ```
40//!
41//! # Frame Boundary Handling
42//!
43//! When capturing aria snapshots, iframes are marked as frame boundaries with
44//! `is_frame: true`. The `aria_snapshot_with_frames()` method captures snapshots
45//! from all frames and stitches them together at the iframe boundaries.
46//!
47//! # Cross-Origin Limitations
48//!
49//! Due to browser security restrictions:
50//! - Same-origin iframes: Content is fully captured and stitched
51//! - Cross-origin iframes: Marked as boundaries with `is_frame: true` but content
52//!   may be limited or empty depending on CDP permissions
53//!
54//! # Example
55//!
56//! ```no_run
57//! use viewpoint_core::Page;
58//!
59//! # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
60//! // Capture complete accessibility tree including iframes
61//! let snapshot = page.aria_snapshot_with_frames().await?;
62//! println!("{}", snapshot);
63//!
64//! // The snapshot will include all frame content stitched together
65//! // Iframes are represented with their content inline
66//! # Ok(())
67//! # }
68//! ```
69
70mod cdp_helpers;
71mod frame_stitching;
72mod options;
73mod ref_resolution;
74
75use std::collections::HashMap;
76
77use futures::stream::{FuturesUnordered, StreamExt};
78use tracing::{debug, instrument};
79use viewpoint_js::js;
80
81use self::frame_stitching::stitch_frame_content;
82pub use self::options::SnapshotOptions;
83pub(crate) use self::ref_resolution::apply_refs_to_snapshot;
84use super::Page;
85use super::locator::AriaSnapshot;
86use super::locator::aria_js::aria_snapshot_with_refs_js;
87use crate::error::PageError;
88
89impl Page {
90    /// Capture an ARIA accessibility snapshot of the entire page including all frames.
91    ///
92    /// This method captures the accessibility tree of the main frame and all child
93    /// frames (iframes), then stitches them together into a single tree. Frame
94    /// boundaries in the main frame snapshot are replaced with the actual content
95    /// from the corresponding frames.
96    ///
97    /// # Performance
98    ///
99    /// Child frame snapshots are captured in parallel for improved performance.
100    /// For pages with many iframes, this can significantly reduce capture time.
101    ///
102    /// # Frame Content Stitching
103    ///
104    /// The method works by:
105    /// 1. Capturing the main frame's aria snapshot (which marks iframes as boundaries)
106    /// 2. Getting the frame tree from CDP
107    /// 3. For each child frame, capturing its aria snapshot (in parallel)
108    /// 4. Stitching child frame content into the parent snapshot at iframe boundaries
109    ///
110    /// # Cross-Origin Frames
111    ///
112    /// For cross-origin frames, CDP may still be able to capture content through
113    /// out-of-process iframe (OOPIF) handling. However, some content may be
114    /// inaccessible due to browser security policies. In such cases, the frame
115    /// boundary will remain with `is_frame: true` but may have limited or no children.
116    ///
117    /// # Example
118    ///
119    /// ```no_run
120    /// use viewpoint_core::Page;
121    ///
122    /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
123    /// let snapshot = page.aria_snapshot_with_frames().await?;
124    ///
125    /// // The snapshot YAML output will show frame content inline:
126    /// // - document "Main Page"
127    /// //   - heading "Title"
128    /// //   - iframe "Widget Frame" [frame-boundary]
129    /// //     - document "Widget"
130    /// //       - button "Click me"
131    /// println!("{}", snapshot);
132    /// # Ok(())
133    /// # }
134    /// ```
135    ///
136    /// # Errors
137    ///
138    /// Returns an error if:
139    /// - The page is closed
140    /// - Frame tree retrieval fails
141    /// - Snapshot capture fails for the main frame
142    #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
143    pub async fn aria_snapshot_with_frames(&self) -> Result<AriaSnapshot, PageError> {
144        self.aria_snapshot_with_frames_and_options(SnapshotOptions::default())
145            .await
146    }
147
148    /// Capture an ARIA accessibility snapshot of the entire page including all frames,
149    /// with custom options.
150    ///
151    /// See [`aria_snapshot_with_frames`](Self::aria_snapshot_with_frames) for details.
152    ///
153    /// # Example
154    ///
155    /// ```no_run
156    /// use viewpoint_core::{Page, SnapshotOptions};
157    ///
158    /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
159    /// // Skip ref resolution for faster capture
160    /// let options = SnapshotOptions::default().include_refs(false);
161    /// let snapshot = page.aria_snapshot_with_frames_and_options(options).await?;
162    /// # Ok(())
163    /// # }
164    /// ```
165    #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
166    pub async fn aria_snapshot_with_frames_and_options(
167        &self,
168        options: SnapshotOptions,
169    ) -> Result<AriaSnapshot, PageError> {
170        if self.closed {
171            return Err(PageError::Closed);
172        }
173
174        // Get the main frame snapshot first using Page's method to populate ref_map
175        let mut root_snapshot = self.capture_snapshot_with_refs(options.clone()).await?;
176
177        // Get all frames
178        let frames = self.frames().await?;
179
180        // Filter to non-main frames
181        let child_frames: Vec<_> = frames.iter().filter(|f| !f.is_main()).collect();
182
183        if child_frames.is_empty() {
184            return Ok(root_snapshot);
185        }
186
187        debug!(
188            frame_count = child_frames.len(),
189            "Capturing child frame snapshots in parallel"
190        );
191
192        // Capture all child frame snapshots in parallel
193        let frame_futures: FuturesUnordered<_> = child_frames
194            .iter()
195            .map(|frame| {
196                let frame_id = frame.id().to_string();
197                let frame_url = frame.url().clone();
198                let frame_name = frame.name().clone();
199                let opts = options.clone();
200                async move {
201                    match frame.aria_snapshot_with_options(opts).await {
202                        Ok(snapshot) => Some((frame_id, frame_url, frame_name, snapshot)),
203                        Err(e) => {
204                            tracing::warn!(
205                                error = %e,
206                                frame_id = %frame_id,
207                                frame_url = %frame_url,
208                                "Failed to capture frame snapshot, skipping"
209                            );
210                            None
211                        }
212                    }
213                }
214            })
215            .collect();
216
217        // Collect results
218        let results: Vec<_> = frame_futures.collect().await;
219
220        // Build a map of frame URL/name to captured snapshots
221        let mut frame_snapshots: HashMap<String, AriaSnapshot> = HashMap::new();
222
223        for result in results.into_iter().flatten() {
224            let (frame_id, frame_url, frame_name, snapshot) = result;
225
226            if !frame_url.is_empty() && frame_url != "about:blank" {
227                frame_snapshots.insert(frame_url, snapshot.clone());
228            }
229            if !frame_name.is_empty() {
230                frame_snapshots.insert(frame_name, snapshot.clone());
231            }
232            // Also store by frame ID
233            frame_snapshots.insert(frame_id, snapshot);
234        }
235
236        // Stitch frame content into the snapshot
237        stitch_frame_content(&mut root_snapshot, &frame_snapshots, 0);
238
239        Ok(root_snapshot)
240    }
241
242    /// Capture an ARIA accessibility snapshot of just the main frame.
243    ///
244    /// This is a convenience method equivalent to calling `main_frame().await?.aria_snapshot().await`.
245    /// Unlike `aria_snapshot_with_frames()`, this does NOT stitch in iframe content -
246    /// iframes are left as boundaries with `is_frame: true`.
247    ///
248    /// # Node References
249    ///
250    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
251    /// These refs can be used with `element_from_ref()` or `locator_from_ref()` to
252    /// interact with elements discovered in the snapshot.
253    ///
254    /// # Example
255    ///
256    /// ```no_run
257    /// use viewpoint_core::Page;
258    ///
259    /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
260    /// // Quick snapshot without frame content
261    /// let snapshot = page.aria_snapshot().await?;
262    ///
263    /// // Each element has a ref for interaction
264    /// if let Some(ref node_ref) = snapshot.node_ref {
265    ///     let locator = page.locator_from_ref(node_ref);
266    ///     locator.click().await?;
267    /// }
268    ///
269    /// // Check if there are frame boundaries to expand
270    /// if !snapshot.iframe_refs.is_empty() {
271    ///     println!("Page has {} frames that can be expanded", snapshot.iframe_refs.len());
272    /// }
273    /// # Ok(())
274    /// # }
275    /// ```
276    ///
277    /// # Errors
278    ///
279    /// Returns an error if:
280    /// - The page is closed
281    /// - Snapshot capture fails
282    #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
283    pub async fn aria_snapshot(&self) -> Result<AriaSnapshot, PageError> {
284        self.aria_snapshot_with_options(SnapshotOptions::default())
285            .await
286    }
287
288    /// Capture an ARIA accessibility snapshot with custom options.
289    ///
290    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
291    ///
292    /// # Example
293    ///
294    /// ```no_run
295    /// use viewpoint_core::{Page, SnapshotOptions};
296    ///
297    /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
298    /// // Skip ref resolution for maximum performance
299    /// let options = SnapshotOptions::default().include_refs(false);
300    /// let snapshot = page.aria_snapshot_with_options(options).await?;
301    ///
302    /// // Increase concurrency for fast networks
303    /// let options = SnapshotOptions::default().max_concurrency(100);
304    /// let snapshot = page.aria_snapshot_with_options(options).await?;
305    /// # Ok(())
306    /// # }
307    /// ```
308    #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
309    pub async fn aria_snapshot_with_options(
310        &self,
311        options: SnapshotOptions,
312    ) -> Result<AriaSnapshot, PageError> {
313        if self.closed {
314            return Err(PageError::Closed);
315        }
316
317        // Capture snapshot with element collection for ref resolution
318        self.capture_snapshot_with_refs(options).await
319    }
320
321    /// Internal method to capture a snapshot with refs resolved.
322    ///
323    /// This uses a two-phase approach:
324    /// 1. JS traversal collects the snapshot and element references
325    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
326    ///
327    /// # Performance Optimizations
328    ///
329    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
330    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
331    /// - Configurable concurrency limit to avoid overwhelming the browser
332    #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
333    async fn capture_snapshot_with_refs(
334        &self,
335        options: SnapshotOptions,
336    ) -> Result<AriaSnapshot, PageError> {
337        let snapshot_fn = aria_snapshot_with_refs_js();
338
339        // Evaluate the JS function to get snapshot and element array
340        // We return by value for the snapshot, but need remote objects for elements
341        let js_code = js! {
342            (function() {
343                const getSnapshotWithRefs = @{snapshot_fn};
344                return getSnapshotWithRefs(document.body);
345            })()
346        };
347
348        // First, evaluate to get the result as a RemoteObject (not by value)
349        // so we can access the elements array
350        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
351            .connection()
352            .send_command(
353                "Runtime.evaluate",
354                Some(viewpoint_cdp::protocol::runtime::EvaluateParams {
355                    expression: js_code,
356                    object_group: Some("viewpoint-snapshot".to_string()),
357                    include_command_line_api: None,
358                    silent: Some(true),
359                    context_id: None,
360                    return_by_value: Some(false), // Get RemoteObject, not value
361                    await_promise: Some(false),
362                }),
363                Some(self.session_id()),
364            )
365            .await?;
366
367        if let Some(exception) = result.exception_details {
368            return Err(PageError::EvaluationFailed(exception.text));
369        }
370
371        let result_object_id = result.result.object_id.ok_or_else(|| {
372            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
373        })?;
374
375        // Get the snapshot property (by value)
376        let snapshot_value = self
377            .get_property_value(&result_object_id, "snapshot")
378            .await?;
379
380        // Parse the snapshot
381        let mut snapshot: AriaSnapshot = serde_json::from_value(snapshot_value).map_err(|e| {
382            PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
383        })?;
384
385        // Clear any previous ref mappings before populating new ones
386        self.clear_ref_map();
387
388        // Only resolve refs if requested
389        if options.include_refs {
390            // Get the elements array as a RemoteObject
391            let elements_result = self
392                .get_property_object(&result_object_id, "elements")
393                .await?;
394
395            if let Some(elements_object_id) = elements_result {
396                // Batch-fetch all array element object IDs using Runtime.getProperties
397                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
398                let element_count = element_object_ids.len();
399
400                debug!(
401                    element_count = element_count,
402                    max_concurrency = options.max_concurrency,
403                    "Resolving element refs in parallel"
404                );
405
406                // Resolve all node IDs in parallel with concurrency limit
407                let index_to_backend_id = self
408                    .resolve_node_ids_parallel(element_object_ids, options.max_concurrency)
409                    .await;
410
411                debug!(
412                    resolved_count = index_to_backend_id.len(),
413                    total_count = element_count,
414                    "Completed parallel ref resolution"
415                );
416
417                // Apply refs to the snapshot tree and get ref-to-backendId mappings
418                // Frame index 0 = main frame (Page captures always use the main frame)
419                let ref_to_backend_id = apply_refs_to_snapshot(
420                    &mut snapshot,
421                    &index_to_backend_id,
422                    self.context_index,
423                    self.page_index,
424                    0, // main frame
425                );
426
427                // Store the ref mappings for later resolution
428                for (ref_str, backend_node_id) in ref_to_backend_id {
429                    self.store_ref_mapping(ref_str, backend_node_id);
430                }
431
432                // Release the elements array to free memory
433                let _ = self.release_object(&elements_object_id).await;
434            }
435        }
436
437        // Release the result object
438        let _ = self.release_object(&result_object_id).await;
439
440        Ok(snapshot)
441    }
442}