viewpoint_core/page/frame/
aria.rs

1//! Frame ARIA accessibility snapshot operations.
2
3use std::collections::HashMap;
4
5use futures::stream::{FuturesUnordered, StreamExt};
6use tracing::{debug, instrument, trace};
7use viewpoint_cdp::protocol::dom::{BackendNodeId, DescribeNodeParams, DescribeNodeResult};
8use viewpoint_cdp::protocol::runtime::EvaluateParams;
9use viewpoint_js::js;
10
11use super::Frame;
12use crate::error::PageError;
13use crate::page::aria_snapshot::{SnapshotOptions, apply_refs_to_snapshot};
14use crate::page::locator::aria_js::aria_snapshot_with_refs_js;
15
16impl Frame {
17    /// Capture an ARIA accessibility snapshot of this frame's document.
18    ///
19    /// The snapshot represents the accessible structure of the frame's content
20    /// as it would be exposed to assistive technologies. This is useful for
21    /// accessibility testing and MCP (Model Context Protocol) integrations.
22    ///
23    /// # Node References
24    ///
25    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
26    /// These refs can be used with `Page::element_from_ref()` or `Page::locator_from_ref()`
27    /// to interact with elements discovered in the snapshot.
28    ///
29    /// # Frame Boundaries
30    ///
31    /// Any iframes within this frame are marked as frame boundaries in the snapshot
32    /// with `is_frame: true`. Their content is NOT traversed (for security reasons).
33    /// To capture multi-frame accessibility trees, use `Page::aria_snapshot_with_frames()`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if:
38    /// - The frame is detached
39    /// - JavaScript evaluation fails
40    /// - The snapshot cannot be parsed
41    #[instrument(level = "debug", skip(self), fields(frame_id = %self.id))]
42    pub async fn aria_snapshot(&self) -> Result<crate::page::locator::AriaSnapshot, PageError> {
43        self.aria_snapshot_with_options(SnapshotOptions::default())
44            .await
45    }
46
47    /// Capture an ARIA accessibility snapshot with custom options.
48    ///
49    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
50    ///
51    /// # Example
52    ///
53    /// ```no_run
54    /// use viewpoint_core::SnapshotOptions;
55    ///
56    /// # async fn example(frame: &viewpoint_core::Frame) -> Result<(), viewpoint_core::CoreError> {
57    /// // Skip ref resolution for maximum performance
58    /// let options = SnapshotOptions::default().include_refs(false);
59    /// let snapshot = frame.aria_snapshot_with_options(options).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
64    pub async fn aria_snapshot_with_options(
65        &self,
66        options: SnapshotOptions,
67    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
68        if self.is_detached() {
69            return Err(PageError::EvaluationFailed("Frame is detached".to_string()));
70        }
71
72        // Capture snapshot with element collection for ref resolution
73        self.capture_snapshot_with_refs(options).await
74    }
75
76    /// Internal method to capture a snapshot with refs resolved.
77    ///
78    /// This uses a two-phase approach:
79    /// 1. JS traversal collects the snapshot and element references
80    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
81    ///
82    /// # Performance Optimizations
83    ///
84    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
85    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
86    /// - Configurable concurrency limit to avoid overwhelming the browser
87    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
88    pub(super) async fn capture_snapshot_with_refs(
89        &self,
90        options: SnapshotOptions,
91    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
92        let snapshot_fn = aria_snapshot_with_refs_js();
93
94        // Evaluate the JS function to get snapshot and element array
95        // We return by value for the snapshot, but need remote objects for elements
96        let js_code = js! {
97            (function() {
98                const getSnapshotWithRefs = @{snapshot_fn};
99                return getSnapshotWithRefs(document.body);
100            })()
101        };
102
103        // Get the execution context ID for this frame's main world
104        let context_id = self.main_world_context_id();
105        trace!(context_id = ?context_id, "Using execution context for aria_snapshot()");
106
107        // First, evaluate to get the result as a RemoteObject (not by value)
108        // so we can access the elements array
109        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
110            .connection
111            .send_command(
112                "Runtime.evaluate",
113                Some(EvaluateParams {
114                    expression: js_code,
115                    object_group: Some("viewpoint-snapshot".to_string()),
116                    include_command_line_api: None,
117                    silent: Some(true),
118                    context_id,
119                    return_by_value: Some(false), // Get RemoteObject, not value
120                    await_promise: Some(false),
121                }),
122                Some(&self.session_id),
123            )
124            .await?;
125
126        if let Some(exception) = result.exception_details {
127            return Err(PageError::EvaluationFailed(exception.text));
128        }
129
130        let result_object_id = result.result.object_id.ok_or_else(|| {
131            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
132        })?;
133
134        // Get the snapshot property (by value)
135        let snapshot_value = self
136            .get_property_value(&result_object_id, "snapshot")
137            .await?;
138
139        // Parse the snapshot
140        let mut snapshot: crate::page::locator::AriaSnapshot =
141            serde_json::from_value(snapshot_value).map_err(|e| {
142                PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
143            })?;
144
145        // Only resolve refs if requested
146        if options.get_include_refs() {
147            // Get the elements array as a RemoteObject
148            let elements_result = self
149                .get_property_object(&result_object_id, "elements")
150                .await?;
151
152            if let Some(elements_object_id) = elements_result {
153                // Batch-fetch all array element object IDs using Runtime.getProperties
154                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
155                let element_count = element_object_ids.len();
156
157                debug!(
158                    element_count = element_count,
159                    max_concurrency = options.get_max_concurrency(),
160                    "Resolving element refs in parallel"
161                );
162
163                // Resolve all node IDs in parallel with concurrency limit
164                let ref_map = self
165                    .resolve_node_ids_parallel(element_object_ids, options.get_max_concurrency())
166                    .await;
167
168                debug!(
169                    resolved_count = ref_map.len(),
170                    total_count = element_count,
171                    "Completed parallel ref resolution"
172                );
173
174                // Apply refs to the snapshot tree
175                // Note: Frame doesn't have access to Page's ref_map, so we discard
176                // the returned mappings. Refs captured via Frame are visible in the
177                // snapshot but not resolvable via page.locator_from_ref().
178                // Use page.aria_snapshot() instead for full ref support.
179                let _ = apply_refs_to_snapshot(
180                    &mut snapshot,
181                    &ref_map,
182                    self.context_index,
183                    self.page_index,
184                    self.frame_index,
185                );
186
187                // Release the elements array to free memory
188                let _ = self.release_object(&elements_object_id).await;
189            }
190        }
191
192        // Release the result object
193        let _ = self.release_object(&result_object_id).await;
194
195        Ok(snapshot)
196    }
197
198    /// Batch-fetch all array element object IDs using `Runtime.getProperties`.
199    ///
200    /// This replaces N individual `get_array_element()` calls with a single CDP call,
201    /// significantly reducing round-trips for large arrays.
202    async fn get_all_array_elements(
203        &self,
204        array_object_id: &str,
205    ) -> Result<Vec<(usize, String)>, PageError> {
206        #[derive(Debug, serde::Deserialize)]
207        struct PropertyDescriptor {
208            name: String,
209            value: Option<viewpoint_cdp::protocol::runtime::RemoteObject>,
210        }
211
212        #[derive(Debug, serde::Deserialize)]
213        struct GetPropertiesResult {
214            result: Vec<PropertyDescriptor>,
215        }
216
217        let result: GetPropertiesResult = self
218            .connection
219            .send_command(
220                "Runtime.getProperties",
221                Some(serde_json::json!({
222                    "objectId": array_object_id,
223                    "ownProperties": true,
224                    "generatePreview": false
225                })),
226                Some(&self.session_id),
227            )
228            .await?;
229
230        // Filter to numeric indices and extract object IDs
231        let mut elements: Vec<(usize, String)> = Vec::new();
232
233        for prop in result.result {
234            // Parse numeric indices (array elements)
235            if let Ok(index) = prop.name.parse::<usize>() {
236                if let Some(value) = prop.value {
237                    if let Some(object_id) = value.object_id {
238                        elements.push((index, object_id));
239                    }
240                }
241            }
242        }
243
244        // Sort by index to maintain order
245        elements.sort_by_key(|(index, _)| *index);
246
247        trace!(
248            element_count = elements.len(),
249            "Batch-fetched array elements"
250        );
251
252        Ok(elements)
253    }
254
255    /// Resolve node IDs in parallel with a concurrency limit.
256    ///
257    /// Uses chunked processing with `FuturesUnordered` to limit concurrency
258    /// and avoid overwhelming the browser's CDP connection.
259    async fn resolve_node_ids_parallel(
260        &self,
261        element_object_ids: Vec<(usize, String)>,
262        max_concurrency: usize,
263    ) -> HashMap<usize, BackendNodeId> {
264        let mut ref_map = HashMap::new();
265
266        // Process in chunks to limit concurrency
267        for chunk in element_object_ids.chunks(max_concurrency) {
268            let futures: FuturesUnordered<_> = chunk
269                .iter()
270                .map(|(index, object_id)| {
271                    let index = *index;
272                    let object_id = object_id.clone();
273                    async move {
274                        match self.describe_node(&object_id).await {
275                            Ok(backend_node_id) => {
276                                trace!(
277                                    index = index,
278                                    backend_node_id = backend_node_id,
279                                    "Resolved element ref"
280                                );
281                                Some((index, backend_node_id))
282                            }
283                            Err(e) => {
284                                debug!(index = index, error = %e, "Failed to get backendNodeId for element");
285                                None
286                            }
287                        }
288                    }
289                })
290                .collect();
291
292            // Collect all results from this chunk
293            let results: Vec<_> = futures.collect().await;
294            for result in results.into_iter().flatten() {
295                ref_map.insert(result.0, result.1);
296            }
297        }
298
299        ref_map
300    }
301
302    /// Get a property value from a RemoteObject by name.
303    pub(super) async fn get_property_value(
304        &self,
305        object_id: &str,
306        property: &str,
307    ) -> Result<serde_json::Value, PageError> {
308        #[derive(Debug, serde::Deserialize)]
309        struct CallResult {
310            result: viewpoint_cdp::protocol::runtime::RemoteObject,
311        }
312
313        let js_fn = js! {
314            (function() { return this[#{property}]; })
315        };
316        // Strip outer parentheses for CDP functionDeclaration
317        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
318
319        let result: CallResult = self
320            .connection
321            .send_command(
322                "Runtime.callFunctionOn",
323                Some(serde_json::json!({
324                    "objectId": object_id,
325                    "functionDeclaration": function_declaration,
326                    "returnByValue": true
327                })),
328                Some(&self.session_id),
329            )
330            .await?;
331
332        Ok(result.result.value.unwrap_or(serde_json::Value::Null))
333    }
334
335    /// Get a property as a RemoteObject from a RemoteObject by name.
336    pub(super) async fn get_property_object(
337        &self,
338        object_id: &str,
339        property: &str,
340    ) -> Result<Option<String>, PageError> {
341        #[derive(Debug, serde::Deserialize)]
342        struct CallResult {
343            result: viewpoint_cdp::protocol::runtime::RemoteObject,
344        }
345
346        let js_fn = js! {
347            (function() { return this[#{property}]; })
348        };
349        // Strip outer parentheses for CDP functionDeclaration
350        let function_declaration = js_fn.trim_start_matches('(').trim_end_matches(')');
351
352        let result: CallResult = self
353            .connection
354            .send_command(
355                "Runtime.callFunctionOn",
356                Some(serde_json::json!({
357                    "objectId": object_id,
358                    "functionDeclaration": function_declaration,
359                    "returnByValue": false
360                })),
361                Some(&self.session_id),
362            )
363            .await?;
364
365        Ok(result.result.object_id)
366    }
367
368    /// Get the backendNodeId for an element by its object ID.
369    pub(super) async fn describe_node(&self, object_id: &str) -> Result<BackendNodeId, PageError> {
370        let result: DescribeNodeResult = self
371            .connection
372            .send_command(
373                "DOM.describeNode",
374                Some(DescribeNodeParams {
375                    node_id: None,
376                    backend_node_id: None,
377                    object_id: Some(object_id.to_string()),
378                    depth: Some(0),
379                    pierce: None,
380                }),
381                Some(&self.session_id),
382            )
383            .await?;
384
385        Ok(result.node.backend_node_id)
386    }
387
388    /// Release a RemoteObject by its object ID.
389    pub(super) async fn release_object(&self, object_id: &str) -> Result<(), PageError> {
390        let _: serde_json::Value = self
391            .connection
392            .send_command(
393                "Runtime.releaseObject",
394                Some(serde_json::json!({
395                    "objectId": object_id
396                })),
397                Some(&self.session_id),
398            )
399            .await?;
400
401        Ok(())
402    }
403}