viewpoint_core/page/frame/
aria.rs

1//! Frame ARIA accessibility snapshot operations.
2
3use std::collections::HashMap;
4
5use futures::stream::{FuturesUnordered, StreamExt};
6use tracing::{debug, instrument, trace};
7use viewpoint_cdp::protocol::dom::{BackendNodeId, DescribeNodeParams, DescribeNodeResult};
8use viewpoint_cdp::protocol::runtime::EvaluateParams;
9use viewpoint_js::js;
10
11use super::Frame;
12use crate::error::PageError;
13use crate::page::aria_snapshot::{apply_refs_to_snapshot, SnapshotOptions};
14use crate::page::locator::aria_js::aria_snapshot_with_refs_js;
15
16impl Frame {
17    /// Capture an ARIA accessibility snapshot of this frame's document.
18    ///
19    /// The snapshot represents the accessible structure of the frame's content
20    /// as it would be exposed to assistive technologies. This is useful for
21    /// accessibility testing and MCP (Model Context Protocol) integrations.
22    ///
23    /// # Node References
24    ///
25    /// The snapshot includes `node_ref` on each element (format: `e{backendNodeId}`).
26    /// These refs can be used with `Page::element_from_ref()` or `Page::locator_from_ref()`
27    /// to interact with elements discovered in the snapshot.
28    ///
29    /// # Frame Boundaries
30    ///
31    /// Any iframes within this frame are marked as frame boundaries in the snapshot
32    /// with `is_frame: true`. Their content is NOT traversed (for security reasons).
33    /// To capture multi-frame accessibility trees, use `Page::aria_snapshot_with_frames()`.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if:
38    /// - The frame is detached
39    /// - JavaScript evaluation fails
40    /// - The snapshot cannot be parsed
41    #[instrument(level = "debug", skip(self), fields(frame_id = %self.id))]
42    pub async fn aria_snapshot(&self) -> Result<crate::page::locator::AriaSnapshot, PageError> {
43        self.aria_snapshot_with_options(SnapshotOptions::default())
44            .await
45    }
46
47    /// Capture an ARIA accessibility snapshot with custom options.
48    ///
49    /// See [`aria_snapshot`](Self::aria_snapshot) for details.
50    ///
51    /// # Example
52    ///
53    /// ```no_run
54    /// use viewpoint_core::SnapshotOptions;
55    ///
56    /// # async fn example(frame: &viewpoint_core::Frame) -> Result<(), viewpoint_core::CoreError> {
57    /// // Skip ref resolution for maximum performance
58    /// let options = SnapshotOptions::default().include_refs(false);
59    /// let snapshot = frame.aria_snapshot_with_options(options).await?;
60    /// # Ok(())
61    /// # }
62    /// ```
63    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
64    pub async fn aria_snapshot_with_options(
65        &self,
66        options: SnapshotOptions,
67    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
68        if self.is_detached() {
69            return Err(PageError::EvaluationFailed("Frame is detached".to_string()));
70        }
71
72        // Capture snapshot with element collection for ref resolution
73        self.capture_snapshot_with_refs(options).await
74    }
75
76    /// Internal method to capture a snapshot with refs resolved.
77    ///
78    /// This uses a two-phase approach:
79    /// 1. JS traversal collects the snapshot and element references
80    /// 2. CDP calls resolve each element to its backendNodeId (in parallel)
81    ///
82    /// # Performance Optimizations
83    ///
84    /// - Uses `Runtime.getProperties` to batch-fetch all array element object IDs
85    /// - Uses `FuturesUnordered` to resolve node IDs in parallel
86    /// - Configurable concurrency limit to avoid overwhelming the browser
87    #[instrument(level = "debug", skip(self, options), fields(frame_id = %self.id))]
88    pub(super) async fn capture_snapshot_with_refs(
89        &self,
90        options: SnapshotOptions,
91    ) -> Result<crate::page::locator::AriaSnapshot, PageError> {
92        let snapshot_fn = aria_snapshot_with_refs_js();
93
94        // Evaluate the JS function to get snapshot and element array
95        // We return by value for the snapshot, but need remote objects for elements
96        let js_code = js! {
97            (function() {
98                const getSnapshotWithRefs = @{snapshot_fn};
99                return getSnapshotWithRefs(document.body);
100            })()
101        };
102
103        // Get the execution context ID for this frame's main world
104        let context_id = self.main_world_context_id();
105        trace!(context_id = ?context_id, "Using execution context for aria_snapshot()");
106
107        // First, evaluate to get the result as a RemoteObject (not by value)
108        // so we can access the elements array
109        let result: viewpoint_cdp::protocol::runtime::EvaluateResult = self
110            .connection
111            .send_command(
112                "Runtime.evaluate",
113                Some(EvaluateParams {
114                    expression: js_code,
115                    object_group: Some("viewpoint-snapshot".to_string()),
116                    include_command_line_api: None,
117                    silent: Some(true),
118                    context_id,
119                    return_by_value: Some(false), // Get RemoteObject, not value
120                    await_promise: Some(false),
121                }),
122                Some(&self.session_id),
123            )
124            .await?;
125
126        if let Some(exception) = result.exception_details {
127            return Err(PageError::EvaluationFailed(exception.text));
128        }
129
130        let result_object_id = result.result.object_id.ok_or_else(|| {
131            PageError::EvaluationFailed("No object ID from snapshot evaluation".to_string())
132        })?;
133
134        // Get the snapshot property (by value)
135        let snapshot_value = self
136            .get_property_value(&result_object_id, "snapshot")
137            .await?;
138
139        // Parse the snapshot
140        let mut snapshot: crate::page::locator::AriaSnapshot =
141            serde_json::from_value(snapshot_value).map_err(|e| {
142                PageError::EvaluationFailed(format!("Failed to parse aria snapshot: {e}"))
143            })?;
144
145        // Only resolve refs if requested
146        if options.get_include_refs() {
147            // Get the elements array as a RemoteObject
148            let elements_result = self
149                .get_property_object(&result_object_id, "elements")
150                .await?;
151
152            if let Some(elements_object_id) = elements_result {
153                // Batch-fetch all array element object IDs using Runtime.getProperties
154                let element_object_ids = self.get_all_array_elements(&elements_object_id).await?;
155                let element_count = element_object_ids.len();
156
157                debug!(
158                    element_count = element_count,
159                    max_concurrency = options.get_max_concurrency(),
160                    "Resolving element refs in parallel"
161                );
162
163                // Resolve all node IDs in parallel with concurrency limit
164                let ref_map = self
165                    .resolve_node_ids_parallel(element_object_ids, options.get_max_concurrency())
166                    .await;
167
168                debug!(
169                    resolved_count = ref_map.len(),
170                    total_count = element_count,
171                    "Completed parallel ref resolution"
172                );
173
174                // Apply refs to the snapshot tree
175                apply_refs_to_snapshot(&mut snapshot, &ref_map);
176
177                // Release the elements array to free memory
178                let _ = self.release_object(&elements_object_id).await;
179            }
180        }
181
182        // Release the result object
183        let _ = self.release_object(&result_object_id).await;
184
185        Ok(snapshot)
186    }
187
188    /// Batch-fetch all array element object IDs using `Runtime.getProperties`.
189    ///
190    /// This replaces N individual `get_array_element()` calls with a single CDP call,
191    /// significantly reducing round-trips for large arrays.
192    async fn get_all_array_elements(
193        &self,
194        array_object_id: &str,
195    ) -> Result<Vec<(usize, String)>, PageError> {
196        #[derive(Debug, serde::Deserialize)]
197        struct PropertyDescriptor {
198            name: String,
199            value: Option<viewpoint_cdp::protocol::runtime::RemoteObject>,
200        }
201
202        #[derive(Debug, serde::Deserialize)]
203        struct GetPropertiesResult {
204            result: Vec<PropertyDescriptor>,
205        }
206
207        let result: GetPropertiesResult = self
208            .connection
209            .send_command(
210                "Runtime.getProperties",
211                Some(serde_json::json!({
212                    "objectId": array_object_id,
213                    "ownProperties": true,
214                    "generatePreview": false
215                })),
216                Some(&self.session_id),
217            )
218            .await?;
219
220        // Filter to numeric indices and extract object IDs
221        let mut elements: Vec<(usize, String)> = Vec::new();
222
223        for prop in result.result {
224            // Parse numeric indices (array elements)
225            if let Ok(index) = prop.name.parse::<usize>() {
226                if let Some(value) = prop.value {
227                    if let Some(object_id) = value.object_id {
228                        elements.push((index, object_id));
229                    }
230                }
231            }
232        }
233
234        // Sort by index to maintain order
235        elements.sort_by_key(|(index, _)| *index);
236
237        trace!(element_count = elements.len(), "Batch-fetched array elements");
238
239        Ok(elements)
240    }
241
242    /// Resolve node IDs in parallel with a concurrency limit.
243    ///
244    /// Uses chunked processing with `FuturesUnordered` to limit concurrency
245    /// and avoid overwhelming the browser's CDP connection.
246    async fn resolve_node_ids_parallel(
247        &self,
248        element_object_ids: Vec<(usize, String)>,
249        max_concurrency: usize,
250    ) -> HashMap<usize, BackendNodeId> {
251        let mut ref_map = HashMap::new();
252
253        // Process in chunks to limit concurrency
254        for chunk in element_object_ids.chunks(max_concurrency) {
255            let futures: FuturesUnordered<_> = chunk
256                .iter()
257                .map(|(index, object_id)| {
258                    let index = *index;
259                    let object_id = object_id.clone();
260                    async move {
261                        match self.describe_node(&object_id).await {
262                            Ok(backend_node_id) => {
263                                trace!(
264                                    index = index,
265                                    backend_node_id = backend_node_id,
266                                    "Resolved element ref"
267                                );
268                                Some((index, backend_node_id))
269                            }
270                            Err(e) => {
271                                debug!(index = index, error = %e, "Failed to get backendNodeId for element");
272                                None
273                            }
274                        }
275                    }
276                })
277                .collect();
278
279            // Collect all results from this chunk
280            let results: Vec<_> = futures.collect().await;
281            for result in results.into_iter().flatten() {
282                ref_map.insert(result.0, result.1);
283            }
284        }
285
286        ref_map
287    }
288
289    /// Get a property value from a RemoteObject by name.
290    pub(super) async fn get_property_value(
291        &self,
292        object_id: &str,
293        property: &str,
294    ) -> Result<serde_json::Value, PageError> {
295        #[derive(Debug, serde::Deserialize)]
296        struct CallResult {
297            result: viewpoint_cdp::protocol::runtime::RemoteObject,
298        }
299
300        let result: CallResult = self
301            .connection
302            .send_command(
303                "Runtime.callFunctionOn",
304                Some(serde_json::json!({
305                    "objectId": object_id,
306                    "functionDeclaration": format!("function() {{ return this.{}; }}", property),
307                    "returnByValue": true
308                })),
309                Some(&self.session_id),
310            )
311            .await?;
312
313        Ok(result.result.value.unwrap_or(serde_json::Value::Null))
314    }
315
316    /// Get a property as a RemoteObject from a RemoteObject by name.
317    pub(super) async fn get_property_object(
318        &self,
319        object_id: &str,
320        property: &str,
321    ) -> Result<Option<String>, PageError> {
322        #[derive(Debug, serde::Deserialize)]
323        struct CallResult {
324            result: viewpoint_cdp::protocol::runtime::RemoteObject,
325        }
326
327        let result: CallResult = self
328            .connection
329            .send_command(
330                "Runtime.callFunctionOn",
331                Some(serde_json::json!({
332                    "objectId": object_id,
333                    "functionDeclaration": format!("function() {{ return this.{}; }}", property),
334                    "returnByValue": false
335                })),
336                Some(&self.session_id),
337            )
338            .await?;
339
340        Ok(result.result.object_id)
341    }
342
343    /// Get the backendNodeId for an element by its object ID.
344    pub(super) async fn describe_node(&self, object_id: &str) -> Result<BackendNodeId, PageError> {
345        let result: DescribeNodeResult = self
346            .connection
347            .send_command(
348                "DOM.describeNode",
349                Some(DescribeNodeParams {
350                    node_id: None,
351                    backend_node_id: None,
352                    object_id: Some(object_id.to_string()),
353                    depth: Some(0),
354                    pierce: None,
355                }),
356                Some(&self.session_id),
357            )
358            .await?;
359
360        Ok(result.node.backend_node_id)
361    }
362
363    /// Release a RemoteObject by its object ID.
364    pub(super) async fn release_object(&self, object_id: &str) -> Result<(), PageError> {
365        let _: serde_json::Value = self
366            .connection
367            .send_command(
368                "Runtime.releaseObject",
369                Some(serde_json::json!({
370                    "objectId": object_id
371                })),
372                Some(&self.session_id),
373            )
374            .await?;
375
376        Ok(())
377    }
378}