viewpoint_core/page/frame/
aria.rs

1//! Frame ARIA accessibility snapshot operations.
2
3use std::collections::HashMap;
4
5use futures::stream::{FuturesUnordered, StreamExt};
6use tracing::{debug, instrument, trace};
7use viewpoint_cdp::protocol::dom::{BackendNodeId, DescribeNodeParams, DescribeNodeResult};
8use viewpoint_cdp::protocol::runtime::EvaluateParams;
9use viewpoint_js::js;
10
11use super::Frame;
12use crate::error::PageError;
13use crate::page::aria_snapshot::{SnapshotOptions, apply_refs_to_snapshot};
14use crate::page::locator::aria_js::aria_snapshot_with_refs_js;
15
16impl Frame {
17    /// Capture an ARIA accessibility snapshot of this frame's document.
18    ///
19    /// The snapshot represents the accessible structure of the frame's content
20    /// as it would be exposed to assistive technologies. This is useful for
21    /// accessibility testing and MCP (Model Context Protocol) integrations.
22    ///
23    /// # Node References
24    ///
25    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
26    /// These refs can be used with `Page::element_from_ref()` or `Page::locator_from_ref()`
27    /// to interact with elements discovered in the snapshot.
28    ///
29    /// # Frame Boundaries
30    ///
31    /// Any iframes within this frame are marked as frame boundaries in the snapshot
32    /// with `is_frame: true`. Their content is NOT traversed (for security reasons).
33    /// To capture multi-frame accessibility trees, use `Page::aria_snapshot_with_frames()`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if:
38    /// - The frame is detached
39    /// - JavaScript evaluation fails
40    /// - The snapshot cannot be parsed
41    #[instrument(level = "debug", skip(self), fields(frame_id = %self.id))]
42    pub async fn aria_snapshot(&self) -> Result<crate::page::locator::AriaSnapshot, PageError> {
43        self.aria_snapshot_with_options(SnapshotOptions::default())
44            .await
45    }
46
47    /// Capture an ARIA accessibility snapshot with custom options.
48    ///
49    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
50    ///
51    /// # Example
52    ///
53    /// ```no_run
54    /// use viewpoint_core::SnapshotOptions;
55    ///
56    /// # async fn example(frame: &viewpoint_core::Frame) -> Result<(), viewpoint_core::CoreError> {
57    /// // Skip ref resolution for maximum performance
58    /// let options = SnapshotOptions::default().include_refs(false);
59    /// let snapshot = frame.aria_snapshot_with_options(options).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
64    pub async fn aria_snapshot_with_options(
65        &self,
66        options: SnapshotOptions,
67    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
68        if self.is_detached() {
69            return Err(PageError::EvaluationFailed("Frame is detached".to_string()));
70        }
71
72        // Capture snapshot with element collection for ref resolution
73        self.capture_snapshot_with_refs(options).await
74    }
75
76    /// Internal method to capture a snapshot with refs resolved.
77    ///
78    /// This uses a two-phase approach:
79    /// 1. JS traversal collects the snapshot and element references
80    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
81    ///
82    /// # Performance Optimizations
83    ///
84    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
85    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
86    /// - Configurable concurrency limit to avoid overwhelming the browser
87    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
88    pub(super) async fn capture_snapshot_with_refs(
89        &self,
90        options: SnapshotOptions,
91    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
92        let snapshot_fn = aria_snapshot_with_refs_js();
93
94        // Evaluate the JS function to get snapshot and element array
95        // We return by value for the snapshot, but need remote objects for elements
96        let js_code = js! {
97            (function() {
98                const getSnapshotWithRefs = @{snapshot_fn};
99                return getSnapshotWithRefs(document.body);
100            })()
101        };
102
103        // Get the execution context ID for this frame's main world
104        let context_id = self.main_world_context_id();
105        trace!(context_id = ?context_id, "Using execution context for aria_snapshot()");
106
107        // First, evaluate to get the result as a RemoteObject (not by value)
108        // so we can access the elements array
109        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
110            .connection
111            .send_command(
112                "Runtime.evaluate",
113                Some(EvaluateParams {
114                    expression: js_code,
115                    object_group: Some("viewpoint-snapshot".to_string()),
116                    include_command_line_api: None,
117                    silent: Some(true),
118                    context_id,
119                    return_by_value: Some(false), // Get RemoteObject, not value
120                    await_promise: Some(false),
121                }),
122                Some(&self.session_id),
123            )
124            .await?;
125
126        if let Some(exception) = result.exception_details {
127            return Err(PageError::EvaluationFailed(exception.text));
128        }
129
130        let result_object_id = result.result.object_id.ok_or_else(|| {
131            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
132        })?;
133
134        // Get the snapshot property (by value)
135        let snapshot_value = self
136            .get_property_value(&result_object_id, "snapshot")
137            .await?;
138
139        // Parse the snapshot
140        let mut snapshot: crate::page::locator::AriaSnapshot =
141            serde_json::from_value(snapshot_value).map_err(|e| {
142                PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
143            })?;
144
145        // Only resolve refs if requested
146        if options.get_include_refs() {
147            // Get the elements array as a RemoteObject
148            let elements_result = self
149                .get_property_object(&result_object_id, "elements")
150                .await?;
151
152            if let Some(elements_object_id) = elements_result {
153                // Batch-fetch all array element object IDs using Runtime.getProperties
154                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
155                let element_count = element_object_ids.len();
156
157                debug!(
158                    element_count = element_count,
159                    max_concurrency = options.get_max_concurrency(),
160                    "Resolving element refs in parallel"
161                );
162
163                // Resolve all node IDs in parallel with concurrency limit
164                let ref_map = self
165                    .resolve_node_ids_parallel(element_object_ids, options.get_max_concurrency())
166                    .await;
167
168                debug!(
169                    resolved_count = ref_map.len(),
170                    total_count = element_count,
171                    "Completed parallel ref resolution"
172                );
173
174                // Apply refs to the snapshot tree
175                apply_refs_to_snapshot(&mut snapshot, &ref_map);
176
177                // Release the elements array to free memory
178                let _ = self.release_object(&elements_object_id).await;
179            }
180        }
181
182        // Release the result object
183        let _ = self.release_object(&result_object_id).await;
184
185        Ok(snapshot)
186    }
187
188    /// Batch-fetch all array element object IDs using `Runtime.getProperties`.
189    ///
190    /// This replaces N individual `get_array_element()` calls with a single CDP call,
191    /// significantly reducing round-trips for large arrays.
192    async fn get_all_array_elements(
193        &self,
194        array_object_id: &str,
195    ) -> Result<Vec<(usize, String)>, PageError> {
196        #[derive(Debug, serde::Deserialize)]
197        struct PropertyDescriptor {
198            name: String,
199            value: Option<viewpoint_cdp::protocol::runtime::RemoteObject>,
200        }
201
202        #[derive(Debug, serde::Deserialize)]
203        struct GetPropertiesResult {
204            result: Vec<PropertyDescriptor>,
205        }
206
207        let result: GetPropertiesResult = self
208            .connection
209            .send_command(
210                "Runtime.getProperties",
211                Some(serde_json::json!({
212                    "objectId": array_object_id,
213                    "ownProperties": true,
214                    "generatePreview": false
215                })),
216                Some(&self.session_id),
217            )
218            .await?;
219
220        // Filter to numeric indices and extract object IDs
221        let mut elements: Vec<(usize, String)> = Vec::new();
222
223        for prop in result.result {
224            // Parse numeric indices (array elements)
225            if let Ok(index) = prop.name.parse::<usize>() {
226                if let Some(value) = prop.value {
227                    if let Some(object_id) = value.object_id {
228                        elements.push((index, object_id));
229                    }
230                }
231            }
232        }
233
234        // Sort by index to maintain order
235        elements.sort_by_key(|(index, _)| *index);
236
237        trace!(
238            element_count = elements.len(),
239            "Batch-fetched array elements"
240        );
241
242        Ok(elements)
243    }
244
245    /// Resolve node IDs in parallel with a concurrency limit.
246    ///
247    /// Uses chunked processing with `FuturesUnordered` to limit concurrency
248    /// and avoid overwhelming the browser's CDP connection.
249    async fn resolve_node_ids_parallel(
250        &self,
251        element_object_ids: Vec<(usize, String)>,
252        max_concurrency: usize,
253    ) -> HashMap<usize, BackendNodeId> {
254        let mut ref_map = HashMap::new();
255
256        // Process in chunks to limit concurrency
257        for chunk in element_object_ids.chunks(max_concurrency) {
258            let futures: FuturesUnordered<_> = chunk
259                .iter()
260                .map(|(index, object_id)| {
261                    let index = *index;
262                    let object_id = object_id.clone();
263                    async move {
264                        match self.describe_node(&object_id).await {
265                            Ok(backend_node_id) => {
266                                trace!(
267                                    index = index,
268                                    backend_node_id = backend_node_id,
269                                    "Resolved element ref"
270                                );
271                                Some((index, backend_node_id))
272                            }
273                            Err(e) => {
274                                debug!(index = index, error = %e, "Failed to get backendNodeId for element");
275                                None
276                            }
277                        }
278                    }
279                })
280                .collect();
281
282            // Collect all results from this chunk
283            let results: Vec<_> = futures.collect().await;
284            for result in results.into_iter().flatten() {
285                ref_map.insert(result.0, result.1);
286            }
287        }
288
289        ref_map
290    }
291
292    /// Get a property value from a RemoteObject by name.
293    pub(super) async fn get_property_value(
294        &self,
295        object_id: &str,
296        property: &str,
297    ) -> Result<serde_json::Value, PageError> {
298        #[derive(Debug, serde::Deserialize)]
299        struct CallResult {
300            result: viewpoint_cdp::protocol::runtime::RemoteObject,
301        }
302
303        let result: CallResult = self
304            .connection
305            .send_command(
306                "Runtime.callFunctionOn",
307                Some(serde_json::json!({
308                    "objectId": object_id,
309                    "functionDeclaration": format!("function() {{ return this.{}; }}", property),
310                    "returnByValue": true
311                })),
312                Some(&self.session_id),
313            )
314            .await?;
315
316        Ok(result.result.value.unwrap_or(serde_json::Value::Null))
317    }
318
319    /// Get a property as a RemoteObject from a RemoteObject by name.
320    pub(super) async fn get_property_object(
321        &self,
322        object_id: &str,
323        property: &str,
324    ) -> Result<Option<String>, PageError> {
325        #[derive(Debug, serde::Deserialize)]
326        struct CallResult {
327            result: viewpoint_cdp::protocol::runtime::RemoteObject,
328        }
329
330        let result: CallResult = self
331            .connection
332            .send_command(
333                "Runtime.callFunctionOn",
334                Some(serde_json::json!({
335                    "objectId": object_id,
336                    "functionDeclaration": format!("function() {{ return this.{}; }}", property),
337                    "returnByValue": false
338                })),
339                Some(&self.session_id),
340            )
341            .await?;
342
343        Ok(result.result.object_id)
344    }
345
346    /// Get the backendNodeId for an element by its object ID.
347    pub(super) async fn describe_node(&self, object_id: &str) -> Result<BackendNodeId, PageError> {
348        let result: DescribeNodeResult = self
349            .connection
350            .send_command(
351                "DOM.describeNode",
352                Some(DescribeNodeParams {
353                    node_id: None,
354                    backend_node_id: None,
355                    object_id: Some(object_id.to_string()),
356                    depth: Some(0),
357                    pierce: None,
358                }),
359                Some(&self.session_id),
360            )
361            .await?;
362
363        Ok(result.node.backend_node_id)
364    }
365
366    /// Release a RemoteObject by its object ID.
367    pub(super) async fn release_object(&self, object_id: &str) -> Result<(), PageError> {
368        let _: serde_json::Value = self
369            .connection
370            .send_command(
371                "Runtime.releaseObject",
372                Some(serde_json::json!({
373                    "objectId": object_id
374                })),
375                Some(&self.session_id),
376            )
377            .await?;
378
379        Ok(())
380    }
381}