viewpoint_core/page/frame/
aria.rs

1//! Frame ARIA accessibility snapshot operations.
2
3use std::collections::HashMap;
4
5use futures::stream::{FuturesUnordered, StreamExt};
6use tracing::{debug, instrument, trace};
7use viewpoint_cdp::protocol::dom::{BackendNodeId, DescribeNodeParams, DescribeNodeResult};
8use viewpoint_cdp::protocol::runtime::EvaluateParams;
9use viewpoint_js::js;
10
11use super::Frame;
12use crate::error::PageError;
13use crate::page::aria_snapshot::{SnapshotOptions, apply_refs_to_snapshot};
14use crate::page::locator::aria_js::aria_snapshot_with_refs_js;
15
16impl Frame {
17    /// Capture an ARIA accessibility snapshot of this frame's document.
18    ///
19    /// The snapshot represents the accessible structure of the frame's content
20    /// as it would be exposed to assistive technologies. This is useful for
21    /// accessibility testing and MCP (Model Context Protocol) integrations.
22    ///
23    /// # Node References
24    ///
25    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
26    /// These refs can be used with `Page::element_from_ref()` or `Page::locator_from_ref()`
27    /// to interact with elements discovered in the snapshot.
28    ///
29    /// # Frame Boundaries
30    ///
31    /// Any iframes within this frame are marked as frame boundaries in the snapshot
32    /// with `is_frame: true`. Their content is NOT traversed (for security reasons).
33    /// To capture multi-frame accessibility trees, use `Page::aria_snapshot_with_frames()`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if:
38    /// - The frame is detached
39    /// - JavaScript evaluation fails
40    /// - The snapshot cannot be parsed
41    #[instrument(level = "debug", skip(self), fields(frame_id = %self.id))]
42    pub async fn aria_snapshot(&self) -> Result<crate::page::locator::AriaSnapshot, PageError> {
43        self.aria_snapshot_with_options(SnapshotOptions::default())
44            .await
45    }
46
47    /// Capture an ARIA accessibility snapshot with custom options.
48    ///
49    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
50    ///
51    /// # Example
52    ///
53    /// ```no_run
54    /// use viewpoint_core::SnapshotOptions;
55    ///
56    /// # async fn example(frame: &viewpoint_core::Frame) -> Result<(), viewpoint_core::CoreError> {
57    /// // Skip ref resolution for maximum performance
58    /// let options = SnapshotOptions::default().include_refs(false);
59    /// let snapshot = frame.aria_snapshot_with_options(options).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
64    pub async fn aria_snapshot_with_options(
65        &self,
66        options: SnapshotOptions,
67    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
68        if self.is_detached() {
69            return Err(PageError::EvaluationFailed("Frame is detached".to_string()));
70        }
71
72        // Capture snapshot with element collection for ref resolution, discarding the ref mappings
73        // (Frame's public API doesn't return them - use Page.aria_snapshot_with_frames() for full ref support)
74        let (snapshot, _ref_mappings) = self.capture_snapshot_with_refs(options).await?;
75        Ok(snapshot)
76    }
77
78    /// Internal method to capture a snapshot with refs resolved.
79    ///
80    /// This uses a two-phase approach:
81    /// 1. JS traversal collects the snapshot and element references
82    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
83    ///
84    /// Returns both the snapshot and a map of ref strings to their backendNodeIds.
85    /// The caller should store these mappings in Page's ref_map for later resolution.
86    ///
87    /// # Performance Optimizations
88    ///
89    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
90    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
91    /// - Configurable concurrency limit to avoid overwhelming the browser
92    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
93    pub(crate) async fn capture_snapshot_with_refs(
94        &self,
95        options: SnapshotOptions,
96    ) -> Result<
97        (
98            crate::page::locator::AriaSnapshot,
99            HashMap<String, BackendNodeId>,
100        ),
101        PageError,
102    > {
103        let snapshot_fn = aria_snapshot_with_refs_js();
104
105        // Evaluate the JS function to get snapshot and element array
106        // We return by value for the snapshot, but need remote objects for elements
107        let js_code = js! {
108            (function() {
109                const getSnapshotWithRefs = @{snapshot_fn};
110                return getSnapshotWithRefs(document.body);
111            })()
112        };
113
114        // Get the execution context ID for this frame's main world
115        let context_id = self.main_world_context_id();
116        trace!(context_id = ?context_id, "Using execution context for aria_snapshot()");
117
118        // First, evaluate to get the result as a RemoteObject (not by value)
119        // so we can access the elements array
120        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
121            .connection
122            .send_command(
123                "Runtime.evaluate",
124                Some(EvaluateParams {
125                    expression: js_code,
126                    object_group: Some("viewpoint-snapshot".to_string()),
127                    include_command_line_api: None,
128                    silent: Some(true),
129                    context_id,
130                    return_by_value: Some(false), // Get RemoteObject, not value
131                    await_promise: Some(false),
132                }),
133                Some(&self.session_id),
134            )
135            .await?;
136
137        if let Some(exception) = result.exception_details {
138            return Err(PageError::EvaluationFailed(exception.text));
139        }
140
141        let result_object_id = result.result.object_id.ok_or_else(|| {
142            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
143        })?;
144
145        // Get the snapshot property (by value)
146        let snapshot_value = self
147            .get_property_value(&result_object_id, "snapshot")
148            .await?;
149
150        // Parse the snapshot
151        let mut snapshot: crate::page::locator::AriaSnapshot =
152            serde_json::from_value(snapshot_value).map_err(|e| {
153                PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
154            })?;
155
156        // Collect ref mappings to return to caller
157        let mut ref_mappings: HashMap<String, BackendNodeId> = HashMap::new();
158
159        // Only resolve refs if requested
160        if options.get_include_refs() {
161            // Get the elements array as a RemoteObject
162            let elements_result = self
163                .get_property_object(&result_object_id, "elements")
164                .await?;
165
166            if let Some(elements_object_id) = elements_result {
167                // Batch-fetch all array element object IDs using Runtime.getProperties
168                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
169                let element_count = element_object_ids.len();
170
171                debug!(
172                    element_count = element_count,
173                    max_concurrency = options.get_max_concurrency(),
174                    "Resolving element refs in parallel"
175                );
176
177                // Resolve all node IDs in parallel with concurrency limit
178                let index_to_backend_id = self
179                    .resolve_node_ids_parallel(element_object_ids, options.get_max_concurrency())
180                    .await;
181
182                debug!(
183                    resolved_count = index_to_backend_id.len(),
184                    total_count = element_count,
185                    "Completed parallel ref resolution"
186                );
187
188                // Apply refs to the snapshot tree and collect the ref-to-backendNodeId mappings
189                ref_mappings = apply_refs_to_snapshot(
190                    &mut snapshot,
191                    &index_to_backend_id,
192                    self.context_index,
193                    self.page_index,
194                    self.frame_index,
195                );
196
197                // Release the elements array to free memory
198                let _ = self.release_object(&elements_object_id).await;
199            }
200        }
201
202        // Release the result object
203        let _ = self.release_object(&result_object_id).await;
204
205        Ok((snapshot, ref_mappings))
206    }
207
208    /// Batch-fetch all array element object IDs using `Runtime.getProperties`.
209    ///
210    /// This replaces N individual `get_array_element()` calls with a single CDP call,
211    /// significantly reducing round-trips for large arrays.
212    async fn get_all_array_elements(
213        &self,
214        array_object_id: &str,
215    ) -> Result<Vec<(usize, String)>, PageError> {
216        #[derive(Debug, serde::Deserialize)]
217        struct PropertyDescriptor {
218            name: String,
219            value: Option<viewpoint_cdp::protocol::runtime::RemoteObject>,
220        }
221
222        #[derive(Debug, serde::Deserialize)]
223        struct GetPropertiesResult {
224            result: Vec<PropertyDescriptor>,
225        }
226
227        let result: GetPropertiesResult = self
228            .connection
229            .send_command(
230                "Runtime.getProperties",
231                Some(serde_json::json!({
232                    "objectId": array_object_id,
233                    "ownProperties": true,
234                    "generatePreview": false
235                })),
236                Some(&self.session_id),
237            )
238            .await?;
239
240        // Filter to numeric indices and extract object IDs
241        let mut elements: Vec<(usize, String)> = Vec::new();
242
243        for prop in result.result {
244            // Parse numeric indices (array elements)
245            if let Ok(index) = prop.name.parse::<usize>() {
246                if let Some(value) = prop.value {
247                    if let Some(object_id) = value.object_id {
248                        elements.push((index, object_id));
249                    }
250                }
251            }
252        }
253
254        // Sort by index to maintain order
255        elements.sort_by_key(|(index, _)| *index);
256
257        trace!(
258            element_count = elements.len(),
259            "Batch-fetched array elements"
260        );
261
262        Ok(elements)
263    }
264
265    /// Resolve node IDs in parallel with a concurrency limit.
266    ///
267    /// Uses chunked processing with `FuturesUnordered` to limit concurrency
268    /// and avoid overwhelming the browser's CDP connection.
269    async fn resolve_node_ids_parallel(
270        &self,
271        element_object_ids: Vec<(usize, String)>,
272        max_concurrency: usize,
273    ) -> HashMap<usize, BackendNodeId> {
274        let mut ref_map = HashMap::new();
275
276        // Process in chunks to limit concurrency
277        for chunk in element_object_ids.chunks(max_concurrency) {
278            let futures: FuturesUnordered<_> = chunk
279                .iter()
280                .map(|(index, object_id)| {
281                    let index = *index;
282                    let object_id = object_id.clone();
283                    async move {
284                        match self.describe_node(&object_id).await {
285                            Ok(backend_node_id) => {
286                                trace!(
287                                    index = index,
288                                    backend_node_id = backend_node_id,
289                                    "Resolved element ref"
290                                );
291                                Some((index, backend_node_id))
292                            }
293                            Err(e) => {
294                                debug!(index = index, error = %e, "Failed to get backendNodeId for element");
295                                None
296                            }
297                        }
298                    }
299                })
300                .collect();
301
302            // Collect all results from this chunk
303            let results: Vec<_> = futures.collect().await;
304            for result in results.into_iter().flatten() {
305                ref_map.insert(result.0, result.1);
306            }
307        }
308
309        ref_map
310    }
311
312    /// Get a property value from a RemoteObject by name.
313    pub(super) async fn get_property_value(
314        &self,
315        object_id: &str,
316        property: &str,
317    ) -> Result<serde_json::Value, PageError> {
318        #[derive(Debug, serde::Deserialize)]
319        struct CallResult {
320            result: viewpoint_cdp::protocol::runtime::RemoteObject,
321        }
322
323        let js_fn = js! {
324            (function() { return this[#{property}]; })
325        };
326        // Strip outer parentheses for CDP functionDeclaration
327        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
328
329        let result: CallResult = self
330            .connection
331            .send_command(
332                "Runtime.callFunctionOn",
333                Some(serde_json::json!({
334                    "objectId": object_id,
335                    "functionDeclaration": function_declaration,
336                    "returnByValue": true
337                })),
338                Some(&self.session_id),
339            )
340            .await?;
341
342        Ok(result.result.value.unwrap_or(serde_json::Value::Null))
343    }
344
345    /// Get a property as a RemoteObject from a RemoteObject by name.
346    pub(super) async fn get_property_object(
347        &self,
348        object_id: &str,
349        property: &str,
350    ) -> Result<Option<String>, PageError> {
351        #[derive(Debug, serde::Deserialize)]
352        struct CallResult {
353            result: viewpoint_cdp::protocol::runtime::RemoteObject,
354        }
355
356        let js_fn = js! {
357            (function() { return this[#{property}]; })
358        };
359        // Strip outer parentheses for CDP functionDeclaration
360        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
361
362        let result: CallResult = self
363            .connection
364            .send_command(
365                "Runtime.callFunctionOn",
366                Some(serde_json::json!({
367                    "objectId": object_id,
368                    "functionDeclaration": function_declaration,
369                    "returnByValue": false
370                })),
371                Some(&self.session_id),
372            )
373            .await?;
374
375        Ok(result.result.object_id)
376    }
377
378    /// Get the backendNodeId for an element by its object ID.
379    pub(super) async fn describe_node(&self, object_id: &str) -> Result<BackendNodeId, PageError> {
380        let result: DescribeNodeResult = self
381            .connection
382            .send_command(
383                "DOM.describeNode",
384                Some(DescribeNodeParams {
385                    node_id: None,
386                    backend_node_id: None,
387                    object_id: Some(object_id.to_string()),
388                    depth: Some(0),
389                    pierce: None,
390                }),
391                Some(&self.session_id),
392            )
393            .await?;
394
395        Ok(result.node.backend_node_id)
396    }
397
398    /// Release a RemoteObject by its object ID.
399    pub(super) async fn release_object(&self, object_id: &str) -> Result<(), PageError> {
400        let _: serde_json::Value = self
401            .connection
402            .send_command(
403                "Runtime.releaseObject",
404                Some(serde_json::json!({
405                    "objectId": object_id
406                })),
407                Some(&self.session_id),
408            )
409            .await?;
410
411        Ok(())
412    }
413}