viewpoint_core/page/frame/
aria.rs

1//! Frame ARIA accessibility snapshot operations.
2
3use std::collections::HashMap;
4
5use futures::stream::{FuturesUnordered, StreamExt};
6use tracing::{debug, instrument, trace};
7use viewpoint_cdp::protocol::dom::{BackendNodeId, DescribeNodeParams, DescribeNodeResult};
8use viewpoint_cdp::protocol::runtime::EvaluateParams;
9use viewpoint_js::js;
10
11use super::Frame;
12use crate::error::PageError;
13use crate::page::aria_snapshot::{SnapshotOptions, apply_refs_to_snapshot};
14use crate::page::locator::aria_js::aria_snapshot_with_refs_js;
15
16impl Frame {
17    /// Capture an ARIA accessibility snapshot of this frame's document.
18    ///
19    /// The snapshot represents the accessible structure of the frame's content
20    /// as it would be exposed to assistive technologies. This is useful for
21    /// accessibility testing and MCP (Model Context Protocol) integrations.
22    ///
23    /// # Node References
24    ///
25    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
26    /// These refs can be used with `Page::element_from_ref()` or `Page::locator_from_ref()`
27    /// to interact with elements discovered in the snapshot.
28    ///
29    /// # Frame Boundaries
30    ///
31    /// Any iframes within this frame are marked as frame boundaries in the snapshot
32    /// with `is_frame: true`. Their content is NOT traversed (for security reasons).
33    /// To capture multi-frame accessibility trees, use `Page::aria_snapshot_with_frames()`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if:
38    /// - The frame is detached
39    /// - JavaScript evaluation fails
40    /// - The snapshot cannot be parsed
41    #[instrument(level = "debug", skip(self), fields(frame_id = %self.id))]
42    pub async fn aria_snapshot(&self) -> Result<crate::page::locator::AriaSnapshot, PageError> {
43        self.aria_snapshot_with_options(SnapshotOptions::default())
44            .await
45    }
46
47    /// Capture an ARIA accessibility snapshot with custom options.
48    ///
49    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
50    ///
51    /// # Example
52    ///
53    /// ```no_run
54    /// use viewpoint_core::SnapshotOptions;
55    ///
56    /// # async fn example(frame: &viewpoint_core::Frame) -> Result<(), viewpoint_core::CoreError> {
57    /// // Skip ref resolution for maximum performance
58    /// let options = SnapshotOptions::default().include_refs(false);
59    /// let snapshot = frame.aria_snapshot_with_options(options).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
64    pub async fn aria_snapshot_with_options(
65        &self,
66        options: SnapshotOptions,
67    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
68        if self.is_detached() {
69            return Err(PageError::EvaluationFailed("Frame is detached".to_string()));
70        }
71
72        // Capture snapshot with element collection for ref resolution, discarding the ref mappings
73        // (Frame's public API doesn't return them - use Page.aria_snapshot_with_frames() for full ref support)
74        let (snapshot, _ref_mappings) = self.capture_snapshot_with_refs(options).await?;
75        Ok(snapshot)
76    }
77
78    /// Internal method to capture a snapshot with refs resolved.
79    ///
80    /// This uses a two-phase approach:
81    /// 1. JS traversal collects the snapshot and element references
82    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
83    ///
84    /// Returns both the snapshot and a map of ref strings to their backendNodeIds.
85    /// The caller should store these mappings in Page's ref_map for later resolution.
86    ///
87    /// # Performance Optimizations
88    ///
89    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
90    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
91    /// - Configurable concurrency limit to avoid overwhelming the browser
92    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
93    pub(crate) async fn capture_snapshot_with_refs(
94        &self,
95        options: SnapshotOptions,
96    ) -> Result<(crate::page::locator::AriaSnapshot, HashMap<String, BackendNodeId>), PageError> {
97        let snapshot_fn = aria_snapshot_with_refs_js();
98
99        // Evaluate the JS function to get snapshot and element array
100        // We return by value for the snapshot, but need remote objects for elements
101        let js_code = js! {
102            (function() {
103                const getSnapshotWithRefs = @{snapshot_fn};
104                return getSnapshotWithRefs(document.body);
105            })()
106        };
107
108        // Get the execution context ID for this frame's main world
109        let context_id = self.main_world_context_id();
110        trace!(context_id = ?context_id, "Using execution context for aria_snapshot()");
111
112        // First, evaluate to get the result as a RemoteObject (not by value)
113        // so we can access the elements array
114        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
115            .connection
116            .send_command(
117                "Runtime.evaluate",
118                Some(EvaluateParams {
119                    expression: js_code,
120                    object_group: Some("viewpoint-snapshot".to_string()),
121                    include_command_line_api: None,
122                    silent: Some(true),
123                    context_id,
124                    return_by_value: Some(false), // Get RemoteObject, not value
125                    await_promise: Some(false),
126                }),
127                Some(&self.session_id),
128            )
129            .await?;
130
131        if let Some(exception) = result.exception_details {
132            return Err(PageError::EvaluationFailed(exception.text));
133        }
134
135        let result_object_id = result.result.object_id.ok_or_else(|| {
136            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
137        })?;
138
139        // Get the snapshot property (by value)
140        let snapshot_value = self
141            .get_property_value(&result_object_id, "snapshot")
142            .await?;
143
144        // Parse the snapshot
145        let mut snapshot: crate::page::locator::AriaSnapshot =
146            serde_json::from_value(snapshot_value).map_err(|e| {
147                PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
148            })?;
149
150        // Collect ref mappings to return to caller
151        let mut ref_mappings: HashMap<String, BackendNodeId> = HashMap::new();
152
153        // Only resolve refs if requested
154        if options.get_include_refs() {
155            // Get the elements array as a RemoteObject
156            let elements_result = self
157                .get_property_object(&result_object_id, "elements")
158                .await?;
159
160            if let Some(elements_object_id) = elements_result {
161                // Batch-fetch all array element object IDs using Runtime.getProperties
162                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
163                let element_count = element_object_ids.len();
164
165                debug!(
166                    element_count = element_count,
167                    max_concurrency = options.get_max_concurrency(),
168                    "Resolving element refs in parallel"
169                );
170
171                // Resolve all node IDs in parallel with concurrency limit
172                let index_to_backend_id = self
173                    .resolve_node_ids_parallel(element_object_ids, options.get_max_concurrency())
174                    .await;
175
176                debug!(
177                    resolved_count = index_to_backend_id.len(),
178                    total_count = element_count,
179                    "Completed parallel ref resolution"
180                );
181
182                // Apply refs to the snapshot tree and collect the ref-to-backendNodeId mappings
183                ref_mappings = apply_refs_to_snapshot(
184                    &mut snapshot,
185                    &index_to_backend_id,
186                    self.context_index,
187                    self.page_index,
188                    self.frame_index,
189                );
190
191                // Release the elements array to free memory
192                let _ = self.release_object(&elements_object_id).await;
193            }
194        }
195
196        // Release the result object
197        let _ = self.release_object(&result_object_id).await;
198
199        Ok((snapshot, ref_mappings))
200    }
201
202    /// Batch-fetch all array element object IDs using `Runtime.getProperties`.
203    ///
204    /// This replaces N individual `get_array_element()` calls with a single CDP call,
205    /// significantly reducing round-trips for large arrays.
206    async fn get_all_array_elements(
207        &self,
208        array_object_id: &str,
209    ) -> Result<Vec<(usize, String)>, PageError> {
210        #[derive(Debug, serde::Deserialize)]
211        struct PropertyDescriptor {
212            name: String,
213            value: Option<viewpoint_cdp::protocol::runtime::RemoteObject>,
214        }
215
216        #[derive(Debug, serde::Deserialize)]
217        struct GetPropertiesResult {
218            result: Vec<PropertyDescriptor>,
219        }
220
221        let result: GetPropertiesResult = self
222            .connection
223            .send_command(
224                "Runtime.getProperties",
225                Some(serde_json::json!({
226                    "objectId": array_object_id,
227                    "ownProperties": true,
228                    "generatePreview": false
229                })),
230                Some(&self.session_id),
231            )
232            .await?;
233
234        // Filter to numeric indices and extract object IDs
235        let mut elements: Vec<(usize, String)> = Vec::new();
236
237        for prop in result.result {
238            // Parse numeric indices (array elements)
239            if let Ok(index) = prop.name.parse::<usize>() {
240                if let Some(value) = prop.value {
241                    if let Some(object_id) = value.object_id {
242                        elements.push((index, object_id));
243                    }
244                }
245            }
246        }
247
248        // Sort by index to maintain order
249        elements.sort_by_key(|(index, _)| *index);
250
251        trace!(
252            element_count = elements.len(),
253            "Batch-fetched array elements"
254        );
255
256        Ok(elements)
257    }
258
259    /// Resolve node IDs in parallel with a concurrency limit.
260    ///
261    /// Uses chunked processing with `FuturesUnordered` to limit concurrency
262    /// and avoid overwhelming the browser's CDP connection.
263    async fn resolve_node_ids_parallel(
264        &self,
265        element_object_ids: Vec<(usize, String)>,
266        max_concurrency: usize,
267    ) -> HashMap<usize, BackendNodeId> {
268        let mut ref_map = HashMap::new();
269
270        // Process in chunks to limit concurrency
271        for chunk in element_object_ids.chunks(max_concurrency) {
272            let futures: FuturesUnordered<_> = chunk
273                .iter()
274                .map(|(index, object_id)| {
275                    let index = *index;
276                    let object_id = object_id.clone();
277                    async move {
278                        match self.describe_node(&object_id).await {
279                            Ok(backend_node_id) => {
280                                trace!(
281                                    index = index,
282                                    backend_node_id = backend_node_id,
283                                    "Resolved element ref"
284                                );
285                                Some((index, backend_node_id))
286                            }
287                            Err(e) => {
288                                debug!(index = index, error = %e, "Failed to get backendNodeId for element");
289                                None
290                            }
291                        }
292                    }
293                })
294                .collect();
295
296            // Collect all results from this chunk
297            let results: Vec<_> = futures.collect().await;
298            for result in results.into_iter().flatten() {
299                ref_map.insert(result.0, result.1);
300            }
301        }
302
303        ref_map
304    }
305
306    /// Get a property value from a RemoteObject by name.
307    pub(super) async fn get_property_value(
308        &self,
309        object_id: &str,
310        property: &str,
311    ) -> Result<serde_json::Value, PageError> {
312        #[derive(Debug, serde::Deserialize)]
313        struct CallResult {
314            result: viewpoint_cdp::protocol::runtime::RemoteObject,
315        }
316
317        let js_fn = js! {
318            (function() { return this[#{property}]; })
319        };
320        // Strip outer parentheses for CDP functionDeclaration
321        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
322
323        let result: CallResult = self
324            .connection
325            .send_command(
326                "Runtime.callFunctionOn",
327                Some(serde_json::json!({
328                    "objectId": object_id,
329                    "functionDeclaration": function_declaration,
330                    "returnByValue": true
331                })),
332                Some(&self.session_id),
333            )
334            .await?;
335
336        Ok(result.result.value.unwrap_or(serde_json::Value::Null))
337    }
338
339    /// Get a property as a RemoteObject from a RemoteObject by name.
340    pub(super) async fn get_property_object(
341        &self,
342        object_id: &str,
343        property: &str,
344    ) -> Result<Option<String>, PageError> {
345        #[derive(Debug, serde::Deserialize)]
346        struct CallResult {
347            result: viewpoint_cdp::protocol::runtime::RemoteObject,
348        }
349
350        let js_fn = js! {
351            (function() { return this[#{property}]; })
352        };
353        // Strip outer parentheses for CDP functionDeclaration
354        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
355
356        let result: CallResult = self
357            .connection
358            .send_command(
359                "Runtime.callFunctionOn",
360                Some(serde_json::json!({
361                    "objectId": object_id,
362                    "functionDeclaration": function_declaration,
363                    "returnByValue": false
364                })),
365                Some(&self.session_id),
366            )
367            .await?;
368
369        Ok(result.result.object_id)
370    }
371
372    /// Get the backendNodeId for an element by its object ID.
373    pub(super) async fn describe_node(&self, object_id: &str) -> Result<BackendNodeId, PageError> {
374        let result: DescribeNodeResult = self
375            .connection
376            .send_command(
377                "DOM.describeNode",
378                Some(DescribeNodeParams {
379                    node_id: None,
380                    backend_node_id: None,
381                    object_id: Some(object_id.to_string()),
382                    depth: Some(0),
383                    pierce: None,
384                }),
385                Some(&self.session_id),
386            )
387            .await?;
388
389        Ok(result.node.backend_node_id)
390    }
391
392    /// Release a RemoteObject by its object ID.
393    pub(super) async fn release_object(&self, object_id: &str) -> Result<(), PageError> {
394        let _: serde_json::Value = self
395            .connection
396            .send_command(
397                "Runtime.releaseObject",
398                Some(serde_json::json!({
399                    "objectId": object_id
400                })),
401                Some(&self.session_id),
402            )
403            .await?;
404
405        Ok(())
406    }
407}