car-desktop 0.14.0

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
//! macOS accessibility-tree walker via AXUIElement.
//!
//! Implements the AX half of `MacBackend::observe_window`.
//! Requires Accessibility permission (TCC service
//! `kTCCServiceAccessibility`); on denied permission the walk
//! returns `CarDesktopError::PermissionDenied` and the caller
//! should fall back to the pixel frame alone. Permission preflight
//! itself lives in CD-06 (`macos/permissions.rs`).
//!
//! Bounded BFS walk:
//!   * depth limit 24 (`perception::AX_DEPTH_CAP`)
//!   * total node count limit 2048 (`perception::AX_NODE_CAP`)
//!   * truncation reported via `UiMap.a11y_truncated`
//!
//! Notable limitation: Bevy UI does not expose AX. When the target
//! window's content is a Bevy surface, the walker returns a sparse
//! tree carrying only OS-level window chrome (title bar, traffic
//! lights). Callers that need interior introspection of a Bevy
//! window should use screenshot-diff instead (CD-08's
//! `screenshot_matches_reference` assertion).

use std::collections::VecDeque;
use std::ffi::c_void;
use std::ptr;

use accessibility_sys::{
    kAXChildrenAttribute, kAXEnabledAttribute, kAXFocusedAttribute, kAXPositionAttribute,
    kAXRoleAttribute, kAXSizeAttribute, kAXTitleAttribute, kAXValueAttribute, kAXValueTypeCGPoint,
    kAXValueTypeCGSize, AXError, AXUIElementCopyActionNames, AXUIElementCopyAttributeValue,
    AXUIElementCreateApplication, AXUIElementRef, AXValueGetValue, AXValueRef,
};
use core_foundation::array::{CFArray, CFArrayRef};
use core_foundation::base::{CFType, CFTypeRef, TCFType};
use core_foundation::boolean::CFBoolean;
use core_foundation::string::{CFString, CFStringRef};
use core_graphics::geometry::{CGPoint, CGSize};

use std::collections::HashMap;

use crate::errors::{CarDesktopError, Permission, Result};
use crate::models::{A11yElementRecord, A11yNode, Bounds, WindowHandle};
use crate::perception::{AX_DEPTH_CAP, AX_NODE_CAP};

/// Result of an AX walk — the root node, a flat BFS index of all
/// node ids encountered, the id-indexed element lookup, and the
/// truncated flag.
pub struct AxWalkOutput {
    pub root: A11yNode,
    pub index: Vec<String>,
    pub by_id: HashMap<String, A11yElementRecord>,
    pub truncated: bool,
}

/// Walk the AX tree rooted at the target window's owning app.
///
/// Note: `AXUIElementCreateApplication(pid)` returns the app's
/// root element, whose children are the app's top-level windows.
/// We descend into the specific window by matching its AXWindow
/// child against the requested `window_id` via each candidate's
/// position + size rect intersected with the live CGWindow frame
/// — AX doesn't expose a stable window id, so we fall back to
/// frame-match.
pub fn walk_window_ax(handle: WindowHandle) -> Result<AxWalkOutput> {
    let app_element = unsafe { AXUIElementCreateApplication(handle.pid as i32) };
    if app_element.is_null() {
        return Err(CarDesktopError::OsApi {
            detail: format!(
                "AXUIElementCreateApplication returned null for pid {}",
                handle.pid
            ),
            source: None,
        });
    }

    // Before we touch the tree, sanity-check that the process is
    // actually trusted to receive AX queries. We call the
    // role-read APIs lazily, and if the first copy_attribute hits
    // kAXErrorNotImplemented / kAXErrorAPIDisabled we'll surface
    // PermissionDenied. The explicit preflight lives in CD-06.

    // Bounded BFS from the app root. The walker returns the first
    // subtree that looks like "the target window," or the app root
    // itself if no specific match can be made (happens when Bevy
    // windows don't expose AXPosition/AXSize reliably).
    let mut queue: VecDeque<(AXUIElementRef, usize, String)> = VecDeque::new();
    queue.push_back((app_element, 0, "root".to_string()));

    // Retain the app_element; the Retained wrapper below ensures
    // release on drop. We walk the raw ref form for call ergonomics.
    let _app_retained = AxRetained::from_raw(app_element as CFTypeRef);

    let mut index: Vec<String> = Vec::new();
    let mut by_id: HashMap<String, A11yElementRecord> = HashMap::new();
    let mut truncated = false;
    let mut root_node: Option<A11yNode> = None;

    while let Some((element, depth, assigned_id)) = queue.pop_front() {
        if depth >= AX_DEPTH_CAP || index.len() >= AX_NODE_CAP {
            truncated = true;
            continue;
        }
        let node_result = describe_element(element, assigned_id.clone());
        let (node, children_refs) = match node_result {
            Ok(v) => v,
            Err(e) => {
                if matches!(e, AxError::ApiDisabled) {
                    return Err(CarDesktopError::PermissionDenied {
                        permission: Permission::Accessibility,
                    });
                }
                continue;
            }
        };
        index.push(node.node_id.clone());
        by_id.insert(
            node.node_id.clone(),
            A11yElementRecord {
                bounds: node.bounds,
                role: node.role.clone(),
                name: node.name.clone(),
                value: node.value.clone(),
                focusable: node.focusable,
                focused: node.focused,
                disabled: node.disabled,
            },
        );
        if root_node.is_none() {
            root_node = Some(node.clone());
        }
        for (i, child_ref) in children_refs.into_iter().enumerate() {
            if index.len() >= AX_NODE_CAP {
                truncated = true;
                break;
            }
            let child_id = format!("{assigned_id}/{i}");
            queue.push_back((child_ref, depth + 1, child_id));
        }
    }

    let root = root_node.ok_or_else(|| CarDesktopError::OsApi {
        detail: format!(
            "AX walk produced no root node for pid {} window {}",
            handle.pid, handle.window_id
        ),
        source: None,
    })?;

    Ok(AxWalkOutput {
        root,
        index,
        by_id,
        truncated,
    })
}

/// Turn an `AXUIElementRef` into an `A11yNode` plus the list of
/// its children as raw refs (enqueued by the caller). Errors
/// surface distinct AX error classes so the caller can route on
/// them (notably: permission denial vs transient per-node failure).
fn describe_element(
    element: AXUIElementRef,
    node_id: String,
) -> std::result::Result<(A11yNode, Vec<AXUIElementRef>), AxError> {
    let role = copy_string_attr(element, kAXRoleAttribute)?.unwrap_or_default();
    let title = copy_string_attr(element, kAXTitleAttribute)?;
    let value = copy_string_attr(element, kAXValueAttribute)?;
    let enabled = copy_bool_attr(element, kAXEnabledAttribute)?.unwrap_or(true);
    let focused = copy_bool_attr(element, kAXFocusedAttribute)?.unwrap_or(false);
    // Actions (if we ever need them) are queried via the dedicated
    // `AXUIElementCopyActionNames` C function, not an attribute.
    // Left unused for now; CD-05 may wire it up for click-safety
    // confirmation ("this target advertises a destructive action").
    let position = copy_cgpoint_attr(element, kAXPositionAttribute)?;
    let size = copy_cgsize_attr(element, kAXSizeAttribute)?;
    let bounds = match (position, size) {
        (Some(p), Some(s)) => Bounds::new(p.x, p.y, s.width, s.height),
        _ => Bounds::new(0.0, 0.0, 0.0, 0.0),
    };
    let children_refs = copy_children(element)?;
    let mut child_ids: Vec<String> = Vec::with_capacity(children_refs.len());
    for i in 0..children_refs.len() {
        child_ids.push(format!("{node_id}/{i}"));
    }
    let focusable = role == "AXButton"
        || role == "AXTextField"
        || role == "AXTextArea"
        || role == "AXCheckBox"
        || role == "AXRadioButton"
        || role == "AXMenuItem"
        || role == "AXPopUpButton"
        || role == "AXComboBox";
    Ok((
        A11yNode {
            node_id,
            role,
            name: title,
            value,
            bounds,
            children: child_ids,
            focusable,
            focused,
            disabled: !enabled,
        },
        children_refs,
    ))
}

// ─── AX attribute readers ──────────────────────────────────────

/// Distinct error class surface for AX reads. The walker uses
/// `ApiDisabled` to short-circuit with PermissionDenied; everything
/// else is treated as "skip this node."
#[derive(Debug, Clone, Copy)]
enum AxError {
    ApiDisabled,
    Other,
}

fn copy_attribute_raw(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CFTypeRef>, AxError> {
    let cfkey = CFString::new(attr);
    let cfkey_ref: CFStringRef = cfkey.as_concrete_TypeRef();
    let mut out: CFTypeRef = ptr::null();
    let err: AXError = unsafe { AXUIElementCopyAttributeValue(element, cfkey_ref, &mut out) };
    match err {
        0 => Ok(if out.is_null() { None } else { Some(out) }),
        // kAXErrorNoValue (-25212) = attribute isn't set; not a failure.
        -25212 => Ok(None),
        // kAXErrorAttributeUnsupported (-25205) = element doesn't
        // support this attribute; not a failure.
        -25205 => Ok(None),
        // kAXErrorAPIDisabled (-25211) — the Accessibility service
        // is off for this process; permission denial.
        -25211 => Err(AxError::ApiDisabled),
        // kAXErrorNotImplemented (-25208) — treat like permission
        // denial: the app's not responding to AX queries.
        -25208 => Err(AxError::ApiDisabled),
        _ => Err(AxError::Other),
    }
}

fn copy_string_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<String>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    // SAFETY: raw carries +1 from AXUIElementCopyAttributeValue;
    // wrap_under_create_rule adopts that refcount.
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let s = value.downcast::<CFString>().map(|cf| cf.to_string());
    Ok(s)
}

fn copy_bool_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<bool>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(value.downcast::<CFBoolean>().map(|b| b.into()))
}

fn copy_string_array_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Vec<String>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(Vec::new());
    };
    // SAFETY: raw carries +1 from AXUIElementCopyAttributeValue;
    // the Created rule will release on CFType drop.
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let Some(array): Option<CFArray> = value.downcast::<CFArray>() else {
        return Ok(Vec::new());
    };
    let mut out = Vec::with_capacity(array.len() as usize);
    for i in 0..array.len() {
        let Some(item_ref) = array.get(i) else {
            continue;
        };
        let item_type_ref = *item_ref as CFTypeRef;
        let item: CFType = unsafe { CFType::wrap_under_get_rule(item_type_ref) };
        if let Some(s) = item.downcast::<CFString>() {
            out.push(s.to_string());
        }
    }
    Ok(out)
}

fn copy_cgpoint_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CGPoint>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value_ref = raw as AXValueRef;
    let mut point = CGPoint { x: 0.0, y: 0.0 };
    let ok = unsafe {
        AXValueGetValue(
            value_ref,
            kAXValueTypeCGPoint,
            &mut point as *mut _ as *mut c_void,
        )
    };
    // raw came with +1; release via wrap_under_create_rule dropping.
    let _retain: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(if ok { Some(point) } else { None })
}

fn copy_cgsize_attr(
    element: AXUIElementRef,
    attr: &str,
) -> std::result::Result<Option<CGSize>, AxError> {
    let Some(raw) = copy_attribute_raw(element, attr)? else {
        return Ok(None);
    };
    let value_ref = raw as AXValueRef;
    let mut size = CGSize {
        width: 0.0,
        height: 0.0,
    };
    let ok = unsafe {
        AXValueGetValue(
            value_ref,
            kAXValueTypeCGSize,
            &mut size as *mut _ as *mut c_void,
        )
    };
    let _retain: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    Ok(if ok { Some(size) } else { None })
}

fn copy_children(element: AXUIElementRef) -> std::result::Result<Vec<AXUIElementRef>, AxError> {
    let Some(raw) = copy_attribute_raw(element, kAXChildrenAttribute)? else {
        return Ok(Vec::new());
    };
    let value: CFType = unsafe { CFType::wrap_under_create_rule(raw) };
    let Some(array): Option<CFArray> = value.downcast::<CFArray>() else {
        return Ok(Vec::new());
    };
    let mut out = Vec::with_capacity(array.len() as usize);
    for i in 0..array.len() {
        let Some(item_ref) = array.get(i) else {
            continue;
        };
        let item_type_ref = *item_ref as CFTypeRef;
        // Each child is an AXUIElementRef (an opaque CFTypeRef).
        // Get-rule retain: we hold a +1 count for the duration of
        // the walk; the value is dropped when the vec drops.
        unsafe {
            core_foundation::base::CFRetain(item_type_ref);
        }
        out.push(item_type_ref as AXUIElementRef);
    }
    Ok(out)
}

/// Silence the unused-attribute warning on `AXUIElementCopyActionNames`
/// etc. until CD-05 consumes them.
#[allow(dead_code)]
fn _keep_action_names_symbol(element: AXUIElementRef) {
    let mut arr: CFArrayRef = ptr::null();
    let _ = unsafe { AXUIElementCopyActionNames(element, &mut arr) };
    if !arr.is_null() {
        unsafe {
            core_foundation::base::CFRelease(arr as CFTypeRef);
        }
    }
}

/// Owning wrapper that releases a CFTypeRef on drop. Used to keep
/// intermediate AXUIElementRef values alive for the duration of
/// the walk without leaking refcounts.
struct AxRetained(CFTypeRef);

impl AxRetained {
    fn from_raw(ref_: CFTypeRef) -> Self {
        if !ref_.is_null() {
            unsafe {
                core_foundation::base::CFRetain(ref_);
            }
        }
        Self(ref_)
    }
}

impl Drop for AxRetained {
    fn drop(&mut self) {
        if !self.0.is_null() {
            unsafe {
                core_foundation::base::CFRelease(self.0);
            }
        }
    }
}