viewpoint_core/page/aria_snapshot/mod.rs
1//! Page-level ARIA accessibility snapshot methods.
2//!
3//! This module provides methods for capturing accessibility snapshots that span
4//! multiple frames, stitching together the accessibility trees from each frame
5//! into a complete representation of the page.
6//!
7//! # Performance
8//!
9//! Snapshot capture is optimized for performance through:
10//! - **Parallel node resolution**: Multiple `DOM.describeNode` CDP calls are executed
11//! concurrently (up to 50 by default) instead of sequentially
12//! - **Batch array access**: Element object IDs are retrieved in a single CDP call
13//! using `Runtime.getProperties` instead of N individual calls
14//! - **Parallel frame capture**: Multi-frame snapshots capture all child frames
15//! concurrently instead of sequentially
16//!
17//! These optimizations can provide 10-20x performance improvement for large DOMs.
18//!
19//! # Configuration
20//!
21//! Use [`SnapshotOptions`] to tune snapshot behavior:
22//!
23//! ```no_run
24//! use viewpoint_core::{Page, SnapshotOptions};
25//!
26//! # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
27//! // Default options (include refs, 50 concurrent CDP calls)
28//! let snapshot = page.aria_snapshot().await?;
29//!
30//! // Skip ref resolution for maximum performance
31//! let options = SnapshotOptions::default().include_refs(false);
32//! let snapshot = page.aria_snapshot_with_options(options).await?;
33//!
34//! // Increase concurrency for fast networks
35//! let options = SnapshotOptions::default().max_concurrency(100);
36//! let snapshot = page.aria_snapshot_with_options(options).await?;
37//! # Ok(())
38//! # }
39//! ```
40//!
41//! # Frame Boundary Handling
42//!
43//! When capturing aria snapshots, iframes are marked as frame boundaries with
44//! `is_frame: true`. The `aria_snapshot_with_frames()` method captures snapshots
45//! from all frames and stitches them together at the iframe boundaries.
46//!
47//! # Cross-Origin Limitations
48//!
49//! Due to browser security restrictions:
50//! - Same-origin iframes: Content is fully captured and stitched
51//! - Cross-origin iframes: Marked as boundaries with `is_frame: true` but content
52//! may be limited or empty depending on CDP permissions
53//!
54//! # Example
55//!
56//! ```no_run
57//! use viewpoint_core::Page;
58//!
59//! # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
60//! // Capture complete accessibility tree including iframes
61//! let snapshot = page.aria_snapshot_with_frames().await?;
62//! println!("{}", snapshot);
63//!
64//! // The snapshot will include all frame content stitched together
65//! // Iframes are represented with their content inline
66//! # Ok(())
67//! # }
68//! ```
69
70mod cdp_helpers;
71mod frame_stitching;
72mod options;
73mod ref_resolution;
74
75use std::collections::HashMap;
76
77use futures::stream::{FuturesUnordered, StreamExt};
78use tracing::{debug, instrument};
79use viewpoint_js::js;
80
81use self::frame_stitching::stitch_frame_content;
82pub use self::options::SnapshotOptions;
83pub(crate) use self::ref_resolution::apply_refs_to_snapshot;
84use super::Page;
85use super::locator::AriaSnapshot;
86use super::locator::aria_js::aria_snapshot_with_refs_js;
87use crate::error::PageError;
88
89impl Page {
90 /// Capture an ARIA accessibility snapshot of the entire page including all frames.
91 ///
92 /// This method captures the accessibility tree of the main frame and all child
93 /// frames (iframes), then stitches them together into a single tree. Frame
94 /// boundaries in the main frame snapshot are replaced with the actual content
95 /// from the corresponding frames.
96 ///
97 /// # Performance
98 ///
99 /// Child frame snapshots are captured in parallel for improved performance.
100 /// For pages with many iframes, this can significantly reduce capture time.
101 ///
102 /// # Frame Content Stitching
103 ///
104 /// The method works by:
105 /// 1. Capturing the main frame's aria snapshot (which marks iframes as boundaries)
106 /// 2. Getting the frame tree from CDP
107 /// 3. For each child frame, capturing its aria snapshot (in parallel)
108 /// 4. Stitching child frame content into the parent snapshot at iframe boundaries
109 ///
110 /// # Cross-Origin Frames
111 ///
112 /// For cross-origin frames, CDP may still be able to capture content through
113 /// out-of-process iframe (OOPIF) handling. However, some content may be
114 /// inaccessible due to browser security policies. In such cases, the frame
115 /// boundary will remain with `is_frame: true` but may have limited or no children.
116 ///
117 /// # Example
118 ///
119 /// ```no_run
120 /// use viewpoint_core::Page;
121 ///
122 /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
123 /// let snapshot = page.aria_snapshot_with_frames().await?;
124 ///
125 /// // The snapshot YAML output will show frame content inline:
126 /// // - document "Main Page"
127 /// // - heading "Title"
128 /// // - iframe "Widget Frame" [frame-boundary]
129 /// // - document "Widget"
130 /// // - button "Click me"
131 /// println!("{}", snapshot);
132 /// # Ok(())
133 /// # }
134 /// ```
135 ///
136 /// # Errors
137 ///
138 /// Returns an error if:
139 /// - The page is closed
140 /// - Frame tree retrieval fails
141 /// - Snapshot capture fails for the main frame
142 #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
143 pub async fn aria_snapshot_with_frames(&self) -> Result<AriaSnapshot, PageError> {
144 self.aria_snapshot_with_frames_and_options(SnapshotOptions::default())
145 .await
146 }
147
148 /// Capture an ARIA accessibility snapshot of the entire page including all frames,
149 /// with custom options.
150 ///
151 /// See [`aria_snapshot_with_frames`](Self::aria_snapshot_with_frames) for details.
152 ///
153 /// # Example
154 ///
155 /// ```no_run
156 /// use viewpoint_core::{Page, SnapshotOptions};
157 ///
158 /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
159 /// // Skip ref resolution for faster capture
160 /// let options = SnapshotOptions::default().include_refs(false);
161 /// let snapshot = page.aria_snapshot_with_frames_and_options(options).await?;
162 /// # Ok(())
163 /// # }
164 /// ```
165 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
166 pub async fn aria_snapshot_with_frames_and_options(
167 &self,
168 options: SnapshotOptions,
169 ) -> Result<AriaSnapshot, PageError> {
170 if self.closed {
171 return Err(PageError::Closed);
172 }
173
174 // Get the main frame snapshot first
175 let main_frame = self.main_frame().await?;
176 let mut root_snapshot = main_frame
177 .aria_snapshot_with_options(options.clone())
178 .await?;
179
180 // Get all frames
181 let frames = self.frames().await?;
182
183 // Filter to non-main frames
184 let child_frames: Vec<_> = frames.iter().filter(|f| !f.is_main()).collect();
185
186 if child_frames.is_empty() {
187 return Ok(root_snapshot);
188 }
189
190 debug!(
191 frame_count = child_frames.len(),
192 "Capturing child frame snapshots in parallel"
193 );
194
195 // Capture all child frame snapshots in parallel
196 let frame_futures: FuturesUnordered<_> = child_frames
197 .iter()
198 .map(|frame| {
199 let frame_id = frame.id().to_string();
200 let frame_url = frame.url().clone();
201 let frame_name = frame.name().clone();
202 let opts = options.clone();
203 async move {
204 match frame.aria_snapshot_with_options(opts).await {
205 Ok(snapshot) => Some((frame_id, frame_url, frame_name, snapshot)),
206 Err(e) => {
207 tracing::warn!(
208 error = %e,
209 frame_id = %frame_id,
210 frame_url = %frame_url,
211 "Failed to capture frame snapshot, skipping"
212 );
213 None
214 }
215 }
216 }
217 })
218 .collect();
219
220 // Collect results
221 let results: Vec<_> = frame_futures.collect().await;
222
223 // Build a map of frame URL/name to captured snapshots
224 let mut frame_snapshots: HashMap<String, AriaSnapshot> = HashMap::new();
225
226 for result in results.into_iter().flatten() {
227 let (frame_id, frame_url, frame_name, snapshot) = result;
228
229 if !frame_url.is_empty() && frame_url != "about:blank" {
230 frame_snapshots.insert(frame_url, snapshot.clone());
231 }
232 if !frame_name.is_empty() {
233 frame_snapshots.insert(frame_name, snapshot.clone());
234 }
235 // Also store by frame ID
236 frame_snapshots.insert(frame_id, snapshot);
237 }
238
239 // Stitch frame content into the snapshot
240 stitch_frame_content(&mut root_snapshot, &frame_snapshots, 0);
241
242 Ok(root_snapshot)
243 }
244
245 /// Capture an ARIA accessibility snapshot of just the main frame.
246 ///
247 /// This is a convenience method equivalent to calling `main_frame().await?.aria_snapshot().await`.
248 /// Unlike `aria_snapshot_with_frames()`, this does NOT stitch in iframe content -
249 /// iframes are left as boundaries with `is_frame: true`.
250 ///
251 /// # Node References
252 ///
253 /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
254 /// These refs can be used with `element_from_ref()` or `locator_from_ref()` to
255 /// interact with elements discovered in the snapshot.
256 ///
257 /// # Example
258 ///
259 /// ```no_run
260 /// use viewpoint_core::Page;
261 ///
262 /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
263 /// // Quick snapshot without frame content
264 /// let snapshot = page.aria_snapshot().await?;
265 ///
266 /// // Each element has a ref for interaction
267 /// if let Some(ref node_ref) = snapshot.node_ref {
268 /// let locator = page.locator_from_ref(node_ref);
269 /// locator.click().await?;
270 /// }
271 ///
272 /// // Check if there are frame boundaries to expand
273 /// if !snapshot.iframe_refs.is_empty() {
274 /// println!("Page has {} frames that can be expanded", snapshot.iframe_refs.len());
275 /// }
276 /// # Ok(())
277 /// # }
278 /// ```
279 ///
280 /// # Errors
281 ///
282 /// Returns an error if:
283 /// - The page is closed
284 /// - Snapshot capture fails
285 #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
286 pub async fn aria_snapshot(&self) -> Result<AriaSnapshot, PageError> {
287 self.aria_snapshot_with_options(SnapshotOptions::default())
288 .await
289 }
290
291 /// Capture an ARIA accessibility snapshot with custom options.
292 ///
293 /// See [`aria_snapshot`](Self::aria_snapshot) for details.
294 ///
295 /// # Example
296 ///
297 /// ```no_run
298 /// use viewpoint_core::{Page, SnapshotOptions};
299 ///
300 /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
301 /// // Skip ref resolution for maximum performance
302 /// let options = SnapshotOptions::default().include_refs(false);
303 /// let snapshot = page.aria_snapshot_with_options(options).await?;
304 ///
305 /// // Increase concurrency for fast networks
306 /// let options = SnapshotOptions::default().max_concurrency(100);
307 /// let snapshot = page.aria_snapshot_with_options(options).await?;
308 /// # Ok(())
309 /// # }
310 /// ```
311 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
312 pub async fn aria_snapshot_with_options(
313 &self,
314 options: SnapshotOptions,
315 ) -> Result<AriaSnapshot, PageError> {
316 if self.closed {
317 return Err(PageError::Closed);
318 }
319
320 // Capture snapshot with element collection for ref resolution
321 self.capture_snapshot_with_refs(options).await
322 }
323
324 /// Internal method to capture a snapshot with refs resolved.
325 ///
326 /// This uses a two-phase approach:
327 /// 1. JS traversal collects the snapshot and element references
328 /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
329 ///
330 /// # Performance Optimizations
331 ///
332 /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
333 /// - Uses `FuturesUnordered` to resolve node IDs in parallel
334 /// - Configurable concurrency limit to avoid overwhelming the browser
335 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
336 async fn capture_snapshot_with_refs(
337 &self,
338 options: SnapshotOptions,
339 ) -> Result<AriaSnapshot, PageError> {
340 let snapshot_fn = aria_snapshot_with_refs_js();
341
342 // Evaluate the JS function to get snapshot and element array
343 // We return by value for the snapshot, but need remote objects for elements
344 let js_code = js! {
345 (function() {
346 const getSnapshotWithRefs = @{snapshot_fn};
347 return getSnapshotWithRefs(document.body);
348 })()
349 };
350
351 // First, evaluate to get the result as a RemoteObject (not by value)
352 // so we can access the elements array
353 let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
354 .connection()
355 .send_command(
356 "Runtime.evaluate",
357 Some(viewpoint_cdp::protocol::runtime::EvaluateParams {
358 expression: js_code,
359 object_group: Some("viewpoint-snapshot".to_string()),
360 include_command_line_api: None,
361 silent: Some(true),
362 context_id: None,
363 return_by_value: Some(false), // Get RemoteObject, not value
364 await_promise: Some(false),
365 }),
366 Some(self.session_id()),
367 )
368 .await?;
369
370 if let Some(exception) = result.exception_details {
371 return Err(PageError::EvaluationFailed(exception.text));
372 }
373
374 let result_object_id = result.result.object_id.ok_or_else(|| {
375 PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
376 })?;
377
378 // Get the snapshot property (by value)
379 let snapshot_value = self
380 .get_property_value(&result_object_id, "snapshot")
381 .await?;
382
383 // Parse the snapshot
384 let mut snapshot: AriaSnapshot = serde_json::from_value(snapshot_value).map_err(|e| {
385 PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
386 })?;
387
388 // Only resolve refs if requested
389 if options.include_refs {
390 // Get the elements array as a RemoteObject
391 let elements_result = self
392 .get_property_object(&result_object_id, "elements")
393 .await?;
394
395 if let Some(elements_object_id) = elements_result {
396 // Batch-fetch all array element object IDs using Runtime.getProperties
397 let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
398 let element_count = element_object_ids.len();
399
400 debug!(
401 element_count = element_count,
402 max_concurrency = options.max_concurrency,
403 "Resolving element refs in parallel"
404 );
405
406 // Resolve all node IDs in parallel with concurrency limit
407 let ref_map = self
408 .resolve_node_ids_parallel(element_object_ids, options.max_concurrency)
409 .await;
410
411 debug!(
412 resolved_count = ref_map.len(),
413 total_count = element_count,
414 "Completed parallel ref resolution"
415 );
416
417 // Apply refs to the snapshot tree
418 apply_refs_to_snapshot(&mut snapshot, &ref_map);
419
420 // Release the elements array to free memory
421 let _ = self.release_object(&elements_object_id).await;
422 }
423 }
424
425 // Release the result object
426 let _ = self.release_object(&result_object_id).await;
427
428 Ok(snapshot)
429 }
430}