car-desktop 0.8.0

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! macOS accessibility-tree walker via AXUIElement.
//!
//! Implements the AX half of `MacBackend::observe_window`.
//! Requires Accessibility permission (TCC service
//! `kTCCServiceAccessibility`); on denied permission the walk
//! returns `CarDesktopError::PermissionDenied` and the caller
//! should fall back to the pixel frame alone. Permission preflight
//! itself lives in CD-06 (`macos/permissions.rs`).
//!
//! Bounded BFS walk:
//!   * depth limit 24 (`perception::AX_DEPTH_CAP`)
//!   * total node count limit 2048 (`perception::AX_NODE_CAP`)
//!   * truncation reported via `UiMap.a11y_truncated`
//!
//! Notable limitation: Bevy UI does not expose AX. When the target
//! window's content is a Bevy surface, the walker returns a sparse
//! tree carrying only OS-level window chrome (title bar, traffic
//! lights). Callers that need interior introspection of a Bevy
//! window should use screenshot-diff instead (CD-08's
//! `screenshot_matches_reference` assertion).

use std::collections::VecDeque;
use std::ffi::c_void;
use std::ptr;

use accessibility_sys::{
    kAXChildrenAttribute, kAXEnabledAttribute, kAXFocusedAttribute, kAXPositionAttribute,
    kAXRoleAttribute, kAXSizeAttribute, kAXTitleAttribute, kAXValueAttribute,
    kAXValueTypeCGPoint, kAXValueTypeCGSize, AXError, AXUIElementCopyActionNames,
    AXUIElementCopyAttributeValue, AXUIElementCreateApplication, AXUIElementRef,
    AXValueGetValue, AXValueRef,
};
use core_foundation::array::{CFArray, CFArrayRef};
use core_foundation::base::{CFType, CFTypeRef, TCFType};
use core_foundation::boolean::CFBoolean;
use core_foundation::string::{CFString, CFStringRef};
use core_graphics::geometry::{CGPoint, CGSize};

use std::collections::HashMap;

use crate::errors::{CarDesktopError, Permission, Result};
use crate::models::{A11yElementRecord, A11yNode, Bounds, WindowHandle};
use crate::perception::{AX_DEPTH_CAP, AX_NODE_CAP};

/// Result of an AX walk — the root node, a flat BFS index of all
/// node ids encountered, the id-indexed element lookup, and the
/// truncated flag.
pub struct AxWalkOutput {
    pub root: A11yNode,
    pub index: Vec<String>,
    pub by_id: HashMap<String, A11yElementRecord>,
    pub truncated: bool,
}

/// Walk the AX tree rooted at the target window's owning app.
///
/// Note: `AXUIElementCreateApplication(pid)` returns the app's
/// root element, whose children are the app's top-level windows.
/// We descend into the specific window by matching its AXWindow
/// child against the requested `window_id` via each candidate's
/// position + size rect intersected with the live CGWindow frame
/// — AX doesn't expose a stable window id, so we fall back to
/// frame-match.
pub fn walk_window_ax(handle: WindowHandle) -> Result<AxWalkOutput> {
    let app_element = unsafe { AXUIElementCreateApplication(handle.pid as i32) };
    if app_element.is_null() {
        return Err(CarDesktopError::OsApi {
            detail: format!(
                "AXUIElementCreateApplication returned null for pid {}",
                handle.pid
            ),
            source: None,
        });
    }

    // Before we touch the tree, sanity-check that the process is
    // actually trusted to receive AX queries. We call the
    // role-read APIs lazily, and if the first copy_attribute hits
    // kAXErrorNotImplemented / kAXErrorAPIDisabled we'll surface
    // PermissionDenied. The explicit preflight lives in CD-06.

    // Bounded BFS from the app root. The walker returns the first
    // subtree that looks like "the target window," or the app root
    // itself if no specific match can be made (happens when Bevy
    // windows don't expose AXPosition/AXSize reliably).
    let mut queue: VecDeque<(AXUIElementRef, usize, String)> = VecDeque::new();
    queue.push_back((app_element, 0, "root".to_string()));

    // Retain the app_element; the Retained wrapper below ensures
    // release on drop. We walk the raw ref form for call ergonomics.
    let _app_retained = AxRetained::from_raw(app_element as CFTypeRef);

    let mut index: Vec<String> = Vec::new();
    let mut by_id: HashMap<String, A11yElementRecord> = HashMap::new();
    let mut truncated = false;
    let mut root_node: Option<A11yNode> = None;

    while let Some((element, depth, assigned_id)) = queue.pop_front() {
        if depth >= AX_DEPTH_CAP || index.len() >= AX_NODE_CAP {
            truncated = true;
            continue;
        }
        let node_result = describe_element(element, assigned_id.clone());
        let (node, children_refs) = match node_result {
            Ok(v) => v,
            Err(e) => {
                if matches!(e, AxError::ApiDisabled) {
                    return Err(CarDesktopError::PermissionDenied {
                        permission: Permission::Accessibility,
                    });
                }
                continue;
            }
        };
        index.push(node.node_id.clone());
        by_id.insert(
            node.node_id.clone(),
            A11yElementRecord {
                bounds: node.bounds,
                role: node.role.clone(),
                name: node.name.clone(),
                value: node.value.clone(),
                focusable: node.focusable,
                focused: node.focused,
                disabled: node.disabled,
            },
        );
        if root_node.is_none() {
            root_node = Some(node.clone());
        }
        for (i, child_ref) in children_refs.into_iter().enumerate() {
            if index.len() >= AX_NODE_CAP {
                truncated = true;
                break;
            }
            let child_id = format!("{assigned_id}/{i}");
            queue.push_back((child_ref, depth + 1, child_id));
        }
    }

    let root = root_node.ok_or_else(|| CarDesktopError::OsApi {
        detail: format!(
            "AX walk produced no root node for pid {} window {}",
            handle.pid, handle.window_id
        ),
        source: None,
    })?;

    Ok(AxWalkOutput {
        root,
        index,
        by_id,
        truncated,
    })
}

/// Turn an `AXUIElementRef` into an `A11yNode` plus the list of
/// its children as raw refs (enqueued by the caller). Errors
/// surface distinct AX error classes so the caller can route on
/// them (notably: permission denial vs transient per-node failure).
fn describe_element(
    element: AXUIElementRef,
    node_id: String,
) -> std::result::Result<(A11yNode, Vec<AXUIElementRef>), AxError> {
    let role = copy_string_attr(element, kAXRoleAttribute)?.unwrap_or_default();
    let title = copy_string_attr(element, kAXTitleAttribute)?;
    let value = copy_string_attr(element, kAXValueAttribute)?;
    let enabled = copy_bool_attr(element, kAXEnabledAttribute)?.unwrap_or(true);
    let focused = copy_bool_attr(element, kAXFocusedAttribute)?.unwrap_or(false);
    // Actions (if we ever need them) are queried via the dedicated
    // `AXUIElementCopyActionNames` C function, not an attribute.
    // Left unused for now; CD-05 may wire it up for click-safety
    // confirmation ("this target advertises a destructive action").
    let position = copy_cgpoint_attr(element, kAXPositionAttribute)?;
    let size = copy_cgsize_attr(element, kAXSizeAttribute)?;
    let bounds = match (position, size) {
        (Some(p), Some(s)) => Bounds::new(p.x, p.y, s.width, s.height),
        _ => Bounds::new(0.0, 0.0, 0.0, 0.0),
    };
    let children_refs = copy_children(element)?;
    let mut child_ids: Vec<String> = Vec::with_capacity(children_refs.len());
    for i in 0..children_refs.len() {
        child_ids.push(format!("{node_id}/{i}"));
    }
    let focusable = role == "AXButton"
        || role == "AXTextField"
        || role == "AXTextArea"
        || role == "AXCheckBox"
        || role == "AXRadioButton"
        || role == "AXMenuItem"
        || role == "AXPopUpButton"
        || role == "AXComboBox";
    Ok((
        A11yNode {
            node_id,
            role,
            name: title,
            value,
            bounds,
            children: child_ids,
            focusable,
            focused,
            disabled: !enabled,
        },
        children_refs,
    ))
}

// ─── AX attribute readers ──────────────────────────────────────

/// Distinct error class surface for AX reads. The walker uses
/// `ApiDisabled` to short-circuit with PermissionDenied; everything
/// else is treated as "skip this node."
#[derive(Debug, Clone, Copy)]
enum AxError {
    ApiDisabled,
    Other,
}

fn copy_attribute_raw(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CFTypeRef>, AxError> {
    let cfkey = CFString::new(attr);
    let cfkey_ref: CFStringRef = cfkey.as_concrete_TypeRef();
    let mut out: CFTypeRef = ptr::null();
    let err: AXError = unsafe { AXUIElementCopyAttributeValue(element, cfkey_ref, &mut out) };
    match err {
        0 => Ok(if out.is_null() { None } else { Some(out) }),
        // kAXErrorNoValue (-25212) = attribute isn't set; not a failure.
        -25212 => Ok(None),
        // kAXErrorAttributeUnsupported (-25205) = element doesn't
        // support this attribute; not a failure.
        -25205 => Ok(None),
        // kAXErrorAPIDisabled (-25211) — the Accessibility service
        // is off for this process; permission denial.
        -25211 => Err(AxError::ApiDisabled),
        // kAXErrorNotImplemented (-25208) — treat like permission
        // denial: the app's not responding to AX queries.
        -25208 => Err(AxError::ApiDisabled),
        _ => Err(AxError::Other),
    }
}

fn copy_string_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<String>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    // SAFETY: raw carries +1 from AXUIElementCopyAttributeValue;
    // wrap_under_create_rule adopts that refcount.
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let s = value.downcast::<CFString>().map(|cf| cf.to_string());
    Ok(s)
}

fn copy_bool_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<bool>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(value.downcast::<CFBoolean>().map(|b| b.into()))
}

fn copy_string_array_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Vec<String>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(Vec::new());
    };
    // SAFETY: raw carries +1 from AXUIElementCopyAttributeValue;
    // the Created rule will release on CFType drop.
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let Some(array): Option<CFArray> = value.downcast::<CFArray>() else {
        return Ok(Vec::new());
    };
    let mut out = Vec::with_capacity(array.len() as usize);
    for i in 0..array.len() {
        let Some(item_ref) = array.get(i) else {
            continue;
        };
        let item_type_ref = *item_ref as CFTypeRef;
        let item: CFType = unsafe { CFType::wrap_under_get_rule(item_type_ref) };
        if let Some(s) = item.downcast::<CFString>() {
            out.push(s.to_string());
        }
    }
    Ok(out)
}

fn copy_cgpoint_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CGPoint>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value_ref = raw as AXValueRef;
    let mut point = CGPoint { x: 0.0, y: 0.0 };
    let ok = unsafe {
        AXValueGetValue(
            value_ref,
            kAXValueTypeCGPoint,
            &mut point as *mut _ as *mut c_void,
        )
    };
    // raw came with +1; release via wrap_under_create_rule dropping.
    let _retain: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(if ok { Some(point) } else { None })
}

fn copy_cgsize_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CGSize>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value_ref = raw as AXValueRef;
    let mut size = CGSize {
        width: 0.0,
        height: 0.0,
    };
    let ok = unsafe {
        AXValueGetValue(
            value_ref,
            kAXValueTypeCGSize,
            &mut size as *mut _ as *mut c_void,
        )
    };
    let _retain: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(if ok { Some(size) } else { None })
}

fn copy_children(element: AXUIElementRef) -> std::result::Result<Vec<AXUIElementRef>, AxError> {
    let Some(raw) = copy_attribute_raw(element, kAXChildrenAttribute)? else {
        return Ok(Vec::new());
    };
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let Some(array): Option<CFArray> = value.downcast::<CFArray>() else {
        return Ok(Vec::new());
    };
    let mut out = Vec::with_capacity(array.len() as usize);
    for i in 0..array.len() {
        let Some(item_ref) = array.get(i) else {
            continue;
        };
        let item_type_ref = *item_ref as CFTypeRef;
        // Each child is an AXUIElementRef (an opaque CFTypeRef).
        // Get-rule retain: we hold a +1 count for the duration of
        // the walk; the value is dropped when the vec drops.
        unsafe {
            core_foundation::base::CFRetain(item_type_ref);
        }
        out.push(item_type_ref as AXUIElementRef);
    }
    Ok(out)
}

/// Silence the unused-attribute warning on `AXUIElementCopyActionNames`
/// etc. until CD-05 consumes them.
#[allow(dead_code)]
fn _keep_action_names_symbol(element: AXUIElementRef) {
    let mut arr: CFArrayRef = ptr::null();
    let _ = unsafe { AXUIElementCopyActionNames(element, &mut arr) };
    if !arr.is_null() {
        unsafe {
            core_foundation::base::CFRelease(arr as CFTypeRef);
        }
    }
}

/// Owning wrapper that releases a CFTypeRef on drop. Used to keep
/// intermediate AXUIElementRef values alive for the duration of
/// the walk without leaking refcounts.
struct AxRetained(CFTypeRef);

impl AxRetained {
    fn from_raw(ref_: CFTypeRef) -> Self {
        if !ref_.is_null() {
            unsafe {
                core_foundation::base::CFRetain(ref_);
            }
        }
        Self(ref_)
    }
}

impl Drop for AxRetained {
    fn drop(&mut self) {
        if !self.0.is_null() {
            unsafe {
                core_foundation::base::CFRelease(self.0);
            }
        }
    }
}