viewpoint_core/page/aria_snapshot/mod.rs
1//! Page-level ARIA accessibility snapshot methods.
2//!
3//! This module provides methods for capturing accessibility snapshots that span
4//! multiple frames, stitching together the accessibility trees from each frame
5//! into a complete representation of the page.
6//!
7//! # Performance
8//!
9//! Snapshot capture is optimized for performance through:
10//! - **Parallel node resolution**: Multiple `DOM.describeNode` CDP calls are executed
11//! concurrently (up to 50 by default) instead of sequentially
12//! - **Batch array access**: Element object IDs are retrieved in a single CDP call
13//! using `Runtime.getProperties` instead of N individual calls
14//! - **Parallel frame capture**: Multi-frame snapshots capture all child frames
15//! concurrently instead of sequentially
16//!
17//! These optimizations can provide 10-20x performance improvement for large DOMs.
18//!
19//! # Configuration
20//!
21//! Use [`SnapshotOptions`] to tune snapshot behavior:
22//!
23//! ```no_run
24//! use viewpoint_core::{Page, SnapshotOptions};
25//!
26//! # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
27//! // Default options (include refs, 50 concurrent CDP calls)
28//! let snapshot = page.aria_snapshot().await?;
29//!
30//! // Skip ref resolution for maximum performance
31//! let options = SnapshotOptions::default().include_refs(false);
32//! let snapshot = page.aria_snapshot_with_options(options).await?;
33//!
34//! // Increase concurrency for fast networks
35//! let options = SnapshotOptions::default().max_concurrency(100);
36//! let snapshot = page.aria_snapshot_with_options(options).await?;
37//! # Ok(())
38//! # }
39//! ```
40//!
41//! # Frame Boundary Handling
42//!
43//! When capturing aria snapshots, iframes are marked as frame boundaries with
44//! `is_frame: true`. The `aria_snapshot_with_frames()` method captures snapshots
45//! from all frames and stitches them together at the iframe boundaries.
46//!
47//! # Cross-Origin Limitations
48//!
49//! Due to browser security restrictions:
50//! - Same-origin iframes: Content is fully captured and stitched
51//! - Cross-origin iframes: Marked as boundaries with `is_frame: true` but content
52//! may be limited or empty depending on CDP permissions
53//!
54//! # Example
55//!
56//! ```no_run
57//! use viewpoint_core::Page;
58//!
59//! # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
60//! // Capture complete accessibility tree including iframes
61//! let snapshot = page.aria_snapshot_with_frames().await?;
62//! println!("{}", snapshot);
63//!
64//! // The snapshot will include all frame content stitched together
65//! // Iframes are represented with their content inline
66//! # Ok(())
67//! # }
68//! ```
69
70mod cdp_helpers;
71mod frame_stitching;
72mod options;
73mod ref_resolution;
74
75use std::collections::HashMap;
76
77use futures::stream::{FuturesUnordered, StreamExt};
78use tracing::{debug, instrument};
79use viewpoint_js::js;
80
81use self::frame_stitching::stitch_frame_content;
82pub use self::options::SnapshotOptions;
83pub(crate) use self::ref_resolution::apply_refs_to_snapshot;
84use super::Page;
85use super::locator::AriaSnapshot;
86use super::locator::aria_js::aria_snapshot_with_refs_js;
87use crate::error::PageError;
88
89impl Page {
90 /// Capture an ARIA accessibility snapshot of the entire page including all frames.
91 ///
92 /// This method captures the accessibility tree of the main frame and all child
93 /// frames (iframes), then stitches them together into a single tree. Frame
94 /// boundaries in the main frame snapshot are replaced with the actual content
95 /// from the corresponding frames.
96 ///
97 /// # Element Refs and Iframe Interaction
98 ///
99 /// Element refs from all frames (main and child iframes) are captured and stored
100 /// in the Page's ref map. This means you can use [`locator_from_ref`](Page::locator_from_ref)
101 /// or [`element_from_ref`](Page::element_from_ref) to interact with elements inside
102 /// iframes after calling this method.
103 ///
104 /// Refs from different frames have distinct frame indices in their format:
105 /// - Main frame: `c{ctx}p{page}f0e{counter}`
106 /// - First iframe: `c{ctx}p{page}f1e{counter}`
107 /// - Second iframe: `c{ctx}p{page}f2e{counter}`
108 ///
109 /// # Performance
110 ///
111 /// Child frame snapshots are captured in parallel for improved performance.
112 /// For pages with many iframes, this can significantly reduce capture time.
113 ///
114 /// # Frame Content Stitching
115 ///
116 /// The method works by:
117 /// 1. Capturing the main frame's aria snapshot (which marks iframes as boundaries)
118 /// 2. Getting the frame tree from CDP
119 /// 3. For each child frame, capturing its aria snapshot (in parallel)
120 /// 4. Stitching child frame content into the parent snapshot at iframe boundaries
121 /// 5. Storing all element ref mappings in the Page's ref_map for later resolution
122 ///
123 /// # Cross-Origin Frames
124 ///
125 /// For cross-origin frames, CDP may still be able to capture content through
126 /// out-of-process iframe (OOPIF) handling. However, some content may be
127 /// inaccessible due to browser security policies. In such cases, the frame
128 /// boundary will remain with `is_frame: true` but may have limited or no children.
129 ///
130 /// # Example
131 ///
132 /// ```no_run
133 /// use viewpoint_core::Page;
134 ///
135 /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
136 /// let snapshot = page.aria_snapshot_with_frames().await?;
137 ///
138 /// // The snapshot YAML output will show frame content inline:
139 /// // - document "Main Page"
140 /// // - heading "Title"
141 /// // - iframe "Widget Frame" [frame-boundary]
142 /// // - document "Widget"
143 /// // - button "Click me" [ref=c0p0f1e1]
144 /// println!("{}", snapshot);
145 ///
146 /// // You can interact with elements inside iframes using their refs:
147 /// // page.locator_from_ref("c0p0f1e1").click().await?;
148 /// # Ok(())
149 /// # }
150 /// ```
151 ///
152 /// # Errors
153 ///
154 /// Returns an error if:
155 /// - The page is closed
156 /// - Frame tree retrieval fails
157 /// - Snapshot capture fails for the main frame
158 #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
159 pub async fn aria_snapshot_with_frames(&self) -> Result<AriaSnapshot, PageError> {
160 self.aria_snapshot_with_frames_and_options(SnapshotOptions::default())
161 .await
162 }
163
164 /// Capture an ARIA accessibility snapshot of the entire page including all frames,
165 /// with custom options.
166 ///
167 /// See [`aria_snapshot_with_frames`](Self::aria_snapshot_with_frames) for details.
168 ///
169 /// # Example
170 ///
171 /// ```no_run
172 /// use viewpoint_core::{Page, SnapshotOptions};
173 ///
174 /// # async fn example(page: Page) -> Result<(), viewpoint_core::CoreError> {
175 /// // Skip ref resolution for faster capture
176 /// let options = SnapshotOptions::default().include_refs(false);
177 /// let snapshot = page.aria_snapshot_with_frames_and_options(options).await?;
178 /// # Ok(())
179 /// # }
180 /// ```
181 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
182 pub async fn aria_snapshot_with_frames_and_options(
183 &self,
184 options: SnapshotOptions,
185 ) -> Result<AriaSnapshot, PageError> {
186 if self.closed {
187 return Err(PageError::Closed);
188 }
189
190 // Get the main frame snapshot first using Page's method to populate ref_map
191 let mut root_snapshot = self.capture_snapshot_with_refs(options.clone()).await?;
192
193 // Get all frames
194 let frames = self.frames().await?;
195
196 // Filter to non-main frames
197 let child_frames: Vec<_> = frames.iter().filter(|f| !f.is_main()).collect();
198
199 if child_frames.is_empty() {
200 return Ok(root_snapshot);
201 }
202
203 debug!(
204 frame_count = child_frames.len(),
205 "Capturing child frame snapshots in parallel"
206 );
207
208 // Capture all child frame snapshots in parallel, collecting ref mappings
209 let frame_futures: FuturesUnordered<_> = child_frames
210 .iter()
211 .map(|frame| {
212 let frame_id = frame.id().to_string();
213 let frame_url = frame.url().clone();
214 let frame_name = frame.name().clone();
215 let opts = options.clone();
216 async move {
217 // Use capture_snapshot_with_refs to get both snapshot and ref mappings
218 match frame.capture_snapshot_with_refs(opts).await {
219 Ok((snapshot, ref_mappings)) => {
220 Some((frame_id, frame_url, frame_name, snapshot, ref_mappings))
221 }
222 Err(e) => {
223 tracing::warn!(
224 error = %e,
225 frame_id = %frame_id,
226 frame_url = %frame_url,
227 "Failed to capture frame snapshot, skipping"
228 );
229 None
230 }
231 }
232 }
233 })
234 .collect();
235
236 // Collect results
237 let results: Vec<_> = frame_futures.collect().await;
238
239 // Build a map of frame URL/name to captured snapshots
240 let mut frame_snapshots: HashMap<String, AriaSnapshot> = HashMap::new();
241
242 for result in results.into_iter().flatten() {
243 let (frame_id, frame_url, frame_name, snapshot, ref_mappings) = result;
244
245 // Store child frame ref mappings in Page's ref_map for later resolution
246 for (ref_str, backend_node_id) in ref_mappings {
247 self.store_ref_mapping(ref_str, backend_node_id);
248 }
249
250 if !frame_url.is_empty() && frame_url != "about:blank" {
251 frame_snapshots.insert(frame_url, snapshot.clone());
252 }
253 if !frame_name.is_empty() {
254 frame_snapshots.insert(frame_name, snapshot.clone());
255 }
256 // Also store by frame ID
257 frame_snapshots.insert(frame_id, snapshot);
258 }
259
260 // Stitch frame content into the snapshot
261 stitch_frame_content(&mut root_snapshot, &frame_snapshots, 0);
262
263 Ok(root_snapshot)
264 }
265
266 /// Capture an ARIA accessibility snapshot of just the main frame.
267 ///
268 /// This is a convenience method equivalent to calling `main_frame().await?.aria_snapshot().await`.
269 /// Unlike `aria_snapshot_with_frames()`, this does NOT stitch in iframe content -
270 /// iframes are left as boundaries with `is_frame: true`.
271 ///
272 /// # Node References
273 ///
274 /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
275 /// These refs can be used with `element_from_ref()` or `locator_from_ref()` to
276 /// interact with elements discovered in the snapshot.
277 ///
278 /// # Example
279 ///
280 /// ```no_run
281 /// use viewpoint_core::Page;
282 ///
283 /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
284 /// // Quick snapshot without frame content
285 /// let snapshot = page.aria_snapshot().await?;
286 ///
287 /// // Each element has a ref for interaction
288 /// if let Some(ref node_ref) = snapshot.node_ref {
289 /// let locator = page.locator_from_ref(node_ref);
290 /// locator.click().await?;
291 /// }
292 ///
293 /// // Check if there are frame boundaries to expand
294 /// if !snapshot.iframe_refs.is_empty() {
295 /// println!("Page has {} frames that can be expanded", snapshot.iframe_refs.len());
296 /// }
297 /// # Ok(())
298 /// # }
299 /// ```
300 ///
301 /// # Errors
302 ///
303 /// Returns an error if:
304 /// - The page is closed
305 /// - Snapshot capture fails
306 #[instrument(level = "debug", skip(self), fields(target_id = %self.target_id))]
307 pub async fn aria_snapshot(&self) -> Result<AriaSnapshot, PageError> {
308 self.aria_snapshot_with_options(SnapshotOptions::default())
309 .await
310 }
311
312 /// Capture an ARIA accessibility snapshot with custom options.
313 ///
314 /// See [`aria_snapshot`](Self::aria_snapshot) for details.
315 ///
316 /// # Example
317 ///
318 /// ```no_run
319 /// use viewpoint_core::{Page, SnapshotOptions};
320 ///
321 /// # async fn example(page: &Page) -> Result<(), viewpoint_core::CoreError> {
322 /// // Skip ref resolution for maximum performance
323 /// let options = SnapshotOptions::default().include_refs(false);
324 /// let snapshot = page.aria_snapshot_with_options(options).await?;
325 ///
326 /// // Increase concurrency for fast networks
327 /// let options = SnapshotOptions::default().max_concurrency(100);
328 /// let snapshot = page.aria_snapshot_with_options(options).await?;
329 /// # Ok(())
330 /// # }
331 /// ```
332 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
333 pub async fn aria_snapshot_with_options(
334 &self,
335 options: SnapshotOptions,
336 ) -> Result<AriaSnapshot, PageError> {
337 if self.closed {
338 return Err(PageError::Closed);
339 }
340
341 // Capture snapshot with element collection for ref resolution
342 self.capture_snapshot_with_refs(options).await
343 }
344
345 /// Internal method to capture a snapshot with refs resolved.
346 ///
347 /// This uses a two-phase approach:
348 /// 1. JS traversal collects the snapshot and element references
349 /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
350 ///
351 /// # Performance Optimizations
352 ///
353 /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
354 /// - Uses `FuturesUnordered` to resolve node IDs in parallel
355 /// - Configurable concurrency limit to avoid overwhelming the browser
356 #[instrument(level = "debug", skip(self, options), fields(target_id = %self.target_id))]
357 async fn capture_snapshot_with_refs(
358 &self,
359 options: SnapshotOptions,
360 ) -> Result<AriaSnapshot, PageError> {
361 let snapshot_fn = aria_snapshot_with_refs_js();
362
363 // Evaluate the JS function to get snapshot and element array
364 // We return by value for the snapshot, but need remote objects for elements
365 let js_code = js! {
366 (function() {
367 const getSnapshotWithRefs = @{snapshot_fn};
368 return getSnapshotWithRefs(document.body);
369 })()
370 };
371
372 // First, evaluate to get the result as a RemoteObject (not by value)
373 // so we can access the elements array
374 let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
375 .connection()
376 .send_command(
377 "Runtime.evaluate",
378 Some(viewpoint_cdp::protocol::runtime::EvaluateParams {
379 expression: js_code,
380 object_group: Some("viewpoint-snapshot".to_string()),
381 include_command_line_api: None,
382 silent: Some(true),
383 context_id: None,
384 return_by_value: Some(false), // Get RemoteObject, not value
385 await_promise: Some(false),
386 }),
387 Some(self.session_id()),
388 )
389 .await?;
390
391 if let Some(exception) = result.exception_details {
392 return Err(PageError::EvaluationFailed(exception.text));
393 }
394
395 let result_object_id = result.result.object_id.ok_or_else(|| {
396 PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
397 })?;
398
399 // Get the snapshot property (by value)
400 let snapshot_value = self
401 .get_property_value(&result_object_id, "snapshot")
402 .await?;
403
404 // Parse the snapshot
405 let mut snapshot: AriaSnapshot = serde_json::from_value(snapshot_value).map_err(|e| {
406 PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
407 })?;
408
409 // Clear any previous ref mappings before populating new ones
410 self.clear_ref_map();
411
412 // Only resolve refs if requested
413 if options.include_refs {
414 // Get the elements array as a RemoteObject
415 let elements_result = self
416 .get_property_object(&result_object_id, "elements")
417 .await?;
418
419 if let Some(elements_object_id) = elements_result {
420 // Batch-fetch all array element object IDs using Runtime.getProperties
421 let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
422 let element_count = element_object_ids.len();
423
424 debug!(
425 element_count = element_count,
426 max_concurrency = options.max_concurrency,
427 "Resolving element refs in parallel"
428 );
429
430 // Resolve all node IDs in parallel with concurrency limit
431 let index_to_backend_id = self
432 .resolve_node_ids_parallel(element_object_ids, options.max_concurrency)
433 .await;
434
435 debug!(
436 resolved_count = index_to_backend_id.len(),
437 total_count = element_count,
438 "Completed parallel ref resolution"
439 );
440
441 // Apply refs to the snapshot tree and get ref-to-backendId mappings
442 // Frame index 0 = main frame (Page captures always use the main frame)
443 let ref_to_backend_id = apply_refs_to_snapshot(
444 &mut snapshot,
445 &index_to_backend_id,
446 self.context_index,
447 self.page_index,
448 0, // main frame
449 );
450
451 // Store the ref mappings for later resolution
452 for (ref_str, backend_node_id) in ref_to_backend_id {
453 self.store_ref_mapping(ref_str, backend_node_id);
454 }
455
456 // Release the elements array to free memory
457 let _ = self.release_object(&elements_object_id).await;
458 }
459 }
460
461 // Release the result object
462 let _ = self.release_object(&result_object_id).await;
463
464 Ok(snapshot)
465 }
466}