car-desktop 0.15.1

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
//! macOS backend for `car-desktop`.
//!
//! Implementation lands incrementally across CD-02 through CD-06
//! (see docs/CAR_DESKTOP.md). Each subsequent task removes one or
//! more `CarDesktopError::NotYetImplemented` call sites from this
//! module. Once CD-06 merges, no method in this backend returns
//! `NotYetImplemented` and the variant itself goes away.
//!
//! This module is gated on `cfg(target_os = "macos")` and pulls in
//! the required Apple framework bindings (`objc2-*`,
//! `core-graphics`, `core-foundation`, `accessibility-sys`) via the
//! crate's `[target.'cfg(target_os = "macos")'.dependencies]`. On
//! non-macOS targets this module is not compiled and `MacBackend`
//! does not exist. Consumers should check `cfg!(target_os = "macos")`
//! or use `CarDesktopError::PlatformUnsupported` from the non-macOS
//! path.

use async_trait::async_trait;

use crate::backend::DesktopBackend;
use crate::errors::{CarDesktopError, Result};
use crate::models::{
    ClickRequest, DisplayId, Frame, KeyPressRequest, PermissionRequest, PermissionSnapshot,
    TypeRequest, UiMap, WindowFilter, WindowHandle, WindowInfo,
};
use crate::safety::PerWindowRateLimiter;

pub mod accessibility;
pub mod capture;
pub mod input;
pub mod permissions;
pub mod windows;

/// Locate a node in a UiMap by id, returning its bounds + title.
/// Used by `click` to resolve `element_id` into an absolute point
/// and to feed the destructive-label safety gate. O(1) via the
/// `a11y_by_id` lookup that `observe_window` populates.
fn find_element_bounds(map: &UiMap, element_id: &str) -> Option<(crate::models::Bounds, String)> {
    let record = map.a11y_by_id.get(element_id)?;
    let title = record.name.clone().unwrap_or_default();
    Some((record.bounds, title))
}

/// macOS `DesktopBackend` implementation.
///
/// `MacBackend::new()` is inexpensive; no framework initialization
/// happens until a method actually needs it. The per-window rate
/// limiter lives on the backend so it persists across calls.
pub struct MacBackend {
    rate_limiter: PerWindowRateLimiter,
}

impl MacBackend {
    pub fn new() -> Self {
        Self {
            rate_limiter: PerWindowRateLimiter::new(),
        }
    }
}

impl Default for MacBackend {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl DesktopBackend for MacBackend {
    async fn list_windows(&self, filter: WindowFilter) -> Result<Vec<WindowInfo>> {
        // CGWindowList is a synchronous, potentially-expensive call
        // (allocates a CFArray, walks each window's IOKit service
        // for bundle info). Run it on the blocking pool so the
        // tokio reactor doesn't stall.
        tokio::task::spawn_blocking(move || windows::list_windows_impl(&filter))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn observe_window(&self, window: WindowHandle) -> Result<UiMap> {
        // Find the window's metadata so we can return a fully-
        // populated UiMap (frame + window info). The AX-tree half
        // lands in CD-04; until then we emit `a11y_empty = true`.
        let pid = window.pid;
        let wid = window.window_id;
        let all_windows = tokio::task::spawn_blocking(move || {
            windows::list_windows_impl(&crate::models::WindowFilter::by_pid(pid))
        })
        .await
        .map_err(|e| CarDesktopError::OsApi {
            detail: format!("spawn_blocking join error: {e}"),
            source: Some(Box::new(e)),
        })??;
        let info = all_windows
            .into_iter()
            .find(|w| w.handle.window_id == wid)
            .ok_or_else(|| CarDesktopError::WindowNotFound {
                detail: format!("window {}:{} not in the on-screen list", pid, wid),
            })?;

        let frame = tokio::task::spawn_blocking(move || capture::capture_window_impl(window))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })??;

        // CD-04: AX-tree walk. On Accessibility-permission denial
        // we fall back to returning a frame-only UiMap instead of
        // erroring — the capture still succeeded and the caller can
        // use screenshot-diff on the pixel frame.
        let window_for_ax = window;
        let ax_result =
            tokio::task::spawn_blocking(move || accessibility::walk_window_ax(window_for_ax))
                .await
                .map_err(|e| CarDesktopError::OsApi {
                    detail: format!("spawn_blocking join error: {e}"),
                    source: Some(Box::new(e)),
                })?;
        let mut map = crate::perception::empty_a11y_uimap(info);
        map.frame = Some(frame);
        match ax_result {
            Ok(out) => {
                map.a11y_root = Some(out.root);
                map.a11y_index = out.index;
                map.a11y_by_id = out.by_id;
                map.a11y_truncated = out.truncated;
                map.a11y_empty = false;
            }
            Err(CarDesktopError::PermissionDenied { .. }) => {
                // Accessibility denied → return frame-only UiMap
                // with a11y_empty = true. Callers (notably the
                // self-QA harness) fall back to screenshot-diff.
            }
            Err(other) => return Err(other),
        }
        Ok(map)
    }

    async fn capture_display(&self, display: DisplayId) -> Result<Frame> {
        tokio::task::spawn_blocking(move || capture::capture_display_impl(display))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn focus_window(&self, window: WindowHandle) -> Result<()> {
        tokio::task::spawn_blocking(move || windows::focus_window_impl(window))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn click(&self, request: ClickRequest) -> Result<()> {
        self.rate_limiter.acquire(request.window)?;
        // Resolve element_id → absolute point by re-observing the
        // window so we have a current AX tree. When the caller
        // supplied `point` directly, skip the observation entirely.
        let (point, ax_title) = if let Some(eid) = request.element_id.clone() {
            let map = self.observe_window(request.window).await?;
            let (bounds, title) =
                find_element_bounds(&map, &eid).ok_or(CarDesktopError::UnknownElement {
                    element_id: eid.clone(),
                })?;
            (bounds.center(), Some(title))
        } else if let Some(p) = request.point {
            (p, None)
        } else {
            return Err(CarDesktopError::OsApi {
                detail: "ClickRequest must carry either element_id or point".into(),
                source: None,
            });
        };
        // Frame clamp: the resolved point must lie inside the
        // target window's current frame.
        let pid_for_frame = request.window.pid;
        let wid_for_frame = request.window.window_id;
        let window_info = tokio::task::spawn_blocking(move || {
            windows::list_windows_impl(&crate::models::WindowFilter::by_pid(pid_for_frame))
        })
        .await
        .map_err(|e| CarDesktopError::OsApi {
            detail: format!("spawn_blocking join error: {e}"),
            source: Some(Box::new(e)),
        })??
        .into_iter()
        .find(|w| w.handle.window_id == wid_for_frame)
        .ok_or_else(|| CarDesktopError::WindowNotFound {
            detail: format!(
                "window {}:{} disappeared before click",
                pid_for_frame, wid_for_frame
            ),
        })?;
        if !window_info.frame.contains_point(point.0, point.1) {
            return Err(CarDesktopError::OutOfTargetWindow {
                x: point.0,
                y: point.1,
                frame: window_info.frame,
            });
        }
        // Activate the owning app so the click lands in the right
        // process's event queue.
        self.focus_window(request.window).await.ok();
        let cg_point = core_graphics::geometry::CGPoint::new(point.0, point.1);
        let ax_title_for_safety = ax_title.clone();
        tokio::task::spawn_blocking(move || {
            input::click_impl(request, cg_point, ax_title_for_safety)
        })
        .await
        .map_err(|e| CarDesktopError::OsApi {
            detail: format!("spawn_blocking join error: {e}"),
            source: Some(Box::new(e)),
        })?
    }

    async fn type_text(&self, request: TypeRequest) -> Result<()> {
        self.rate_limiter.acquire(request.window)?;
        self.focus_window(request.window).await.ok();
        tokio::task::spawn_blocking(move || input::type_text_impl(request))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn keypress(&self, request: KeyPressRequest) -> Result<()> {
        self.rate_limiter.acquire(request.window)?;
        self.focus_window(request.window).await.ok();
        tokio::task::spawn_blocking(move || input::keypress_impl(request))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn permissions(&self) -> Result<PermissionSnapshot> {
        tokio::task::spawn_blocking(permissions::permissions_impl)
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }

    async fn request_permissions(&self, needs: PermissionRequest) -> Result<PermissionSnapshot> {
        tokio::task::spawn_blocking(move || permissions::request_permissions_impl(needs))
            .await
            .map_err(|e| CarDesktopError::OsApi {
                detail: format!("spawn_blocking join error: {e}"),
                source: Some(Box::new(e)),
            })?
    }
}