car-desktop 0.15.1

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
//! The `DesktopBackend` trait — the abstract interface every
//! platform-specific backend implements. Shape mirrors
//! `car-browser::BrowserBackend` so the executor's OPA loop can
//! treat `desktop_*` and `browse_*` tools as siblings.

use async_trait::async_trait;

use crate::errors::Result;
use crate::models::{
    ClickRequest, DisplayId, Frame, KeyPressRequest, PermissionRequest, PermissionSnapshot,
    TypeRequest, UiMap, WindowFilter, WindowHandle, WindowInfo,
};

#[async_trait]
pub trait DesktopBackend: Send + Sync {
    /// Enumerate windows on the current user session matching `filter`.
    /// Returns an empty list (not an error) when nothing matches.
    async fn list_windows(&self, filter: WindowFilter) -> Result<Vec<WindowInfo>>;

    /// Capture both pixels and the accessibility tree for a specific
    /// window. The returned `UiMap` may carry `a11y_empty = true` when
    /// the target window renders its own UI without exposing AX
    /// (notably: Tokhn's Bevy GUI). Pixel capture is always attempted;
    /// callers that don't need pixels can ignore `frame`.
    async fn observe_window(&self, window: WindowHandle) -> Result<UiMap>;

    /// Capture a full display's framebuffer with no AX tree. Useful
    /// for multi-window screenshots and for the 5-user study session
    /// recording (Sprint 3).
    async fn capture_display(&self, display: DisplayId) -> Result<Frame>;

    /// Bring a window to the front and grant it keyboard focus. On
    /// macOS this also activates the owning app via
    /// NSRunningApplication.
    async fn focus_window(&self, window: WindowHandle) -> Result<()>;

    /// Synthesize a mouse click. Clamped to the target window's
    /// frame; destructive targets (match regex in
    /// `safety::DESTRUCTIVE_LABEL_RE`) require `unsafe_ok: true`.
    async fn click(&self, request: ClickRequest) -> Result<()>;

    /// Type text into the currently-focused element of the target
    /// window.
    async fn type_text(&self, request: TypeRequest) -> Result<()>;

    /// Press a logical key with optional modifiers.
    async fn keypress(&self, request: KeyPressRequest) -> Result<()>;

    /// Return the current TCC permission state without prompting.
    /// Safe to call at any time, including app startup, as a
    /// gate for the first-run UX.
    async fn permissions(&self) -> Result<PermissionSnapshot>;

    /// Trigger the OS permission prompt(s). On macOS this redirects
    /// the user to System Settings and — for Screen Recording — does
    /// not take effect until the app is quit and relaunched. The
    /// returned snapshot's `needs_restart` flag indicates when that
    /// relaunch is required.
    async fn request_permissions(&self, needs: PermissionRequest) -> Result<PermissionSnapshot>;
}