car-desktop 0.9.0

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
//! macOS input synthesis via CGEvent.
//!
//! Implements `click`, `type_text`, and `keypress` for
//! `MacBackend`. Every safety rule from docs/CAR_DESKTOP.md §
//! "Safety model" is active here — target-window frame clamping,
//! destructive-label gating, rate limiting (shared with
//! `safety::PerWindowRateLimiter`), dry-run short-circuit, 8ms
//! post-event settle, and the Esc-Esc kill switch. None of them
//! are configurable to disable.

use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::sync::OnceLock;

use core_foundation::base::TCFType;
use core_foundation::runloop::{kCFRunLoopCommonModes, CFRunLoop};
use core_graphics::event::{
    CGEvent, CGEventFlags, CGEventTap, CGEventTapLocation, CGEventTapOptions,
    CGEventTapPlacement, CGEventType, CGKeyCode, CGMouseButton,
};
use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
use core_graphics::geometry::CGPoint;

use crate::errors::{CarDesktopError, Result};
use crate::models::{
    ClickRequest, Key, KeyPressRequest, Modifier, MouseButton, TypeRequest, WindowHandle,
};
use crate::safety::{destructive_word_in, POST_EVENT_SETTLE};

/// Global kill switch toggled by the Esc-Esc listener. Once true,
/// every in-flight input call returns `KillSwitchActivated` until
/// the listener re-arms (it re-arms after a 500ms quiet period).
static KILL_SWITCH: AtomicBool = AtomicBool::new(false);

/// One-time setup for the event tap that watches for Esc-Esc and
/// flips `KILL_SWITCH`. First call to click/type/keypress
/// initializes the tap on a dedicated thread so the tokio runtime
/// is unaffected.
static TAP_INIT: OnceLock<()> = OnceLock::new();

/// Perform a click after every safety gate. Rate-limit acquisition
/// happens in `MacBackend::click` before calling this; this
/// function handles the remaining five rules (target-window frame
/// clamp, destructive-label gate, dry-run, kill-switch check,
/// 8ms settle) plus the actual CGEvent post.
pub fn click_impl(
    request: ClickRequest,
    resolved_point: CGPoint,
    matched_ax_title: Option<String>,
) -> Result<()> {
    ensure_kill_switch_tap_installed();
    check_kill_switch()?;

    // Destructive-label gate. If the resolved AX title carries a
    // destructive word, require `unsafe_ok: true`.
    if !request.unsafe_ok {
        if let Some(title) = matched_ax_title.as_deref() {
            if let Some(word) = destructive_word_in(title) {
                return Err(CarDesktopError::DestructiveActionGated {
                    label: format!("{title} (matched '{word}')"),
                });
            }
        }
    }

    if request.dry_run {
        tracing::info!(
            target = "car_desktop::click",
            x = resolved_point.x,
            y = resolved_point.y,
            button = ?request.button,
            pid = request.window.pid,
            window = request.window.window_id,
            "dry-run: skipping CGEventPost"
        );
        return Ok(());
    }

    let button = mouse_button_to_cg(request.button);
    let down_type = match request.button {
        MouseButton::Left => CGEventType::LeftMouseDown,
        MouseButton::Right => CGEventType::RightMouseDown,
        MouseButton::Middle => CGEventType::OtherMouseDown,
    };
    let up_type = match request.button {
        MouseButton::Left => CGEventType::LeftMouseUp,
        MouseButton::Right => CGEventType::RightMouseUp,
        MouseButton::Middle => CGEventType::OtherMouseUp,
    };

    let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventSourceCreate returned null".into(),
            source: None,
        }
    })?;
    let flags = modifiers_to_cg_flags(&request.modifiers);

    let down_event =
        CGEvent::new_mouse_event(source.clone(), down_type, resolved_point, button).map_err(
            |_| CarDesktopError::OsApi {
                detail: "CGEventCreateMouseEvent down returned null".into(),
                source: None,
            },
        )?;
    down_event.set_flags(flags);
    down_event.post(CGEventTapLocation::HID);
    std::thread::sleep(POST_EVENT_SETTLE);

    let up_event = CGEvent::new_mouse_event(source, up_type, resolved_point, button).map_err(
        |_| CarDesktopError::OsApi {
            detail: "CGEventCreateMouseEvent up returned null".into(),
            source: None,
        },
    )?;
    up_event.set_flags(flags);
    up_event.post(CGEventTapLocation::HID);
    std::thread::sleep(POST_EVENT_SETTLE);

    check_kill_switch()?;
    Ok(())
}

pub fn type_text_impl(request: TypeRequest) -> Result<()> {
    ensure_kill_switch_tap_installed();
    check_kill_switch()?;

    if request.dry_run {
        tracing::info!(
            target = "car_desktop::type_text",
            len = request.text.len(),
            pid = request.window.pid,
            "dry-run: skipping CGEventPost"
        );
        return Ok(());
    }

    let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventSourceCreate returned null".into(),
            source: None,
        }
    })?;
    // Use `set_string` which synthesizes a keyboard event carrying
    // the given text as Unicode input — far simpler than emulating
    // per-keycode presses and robust to layout differences.
    let event = CGEvent::new_keyboard_event(source, 0, true).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventCreateKeyboardEvent returned null".into(),
            source: None,
        }
    })?;
    event.set_string(&request.text);
    event.post(CGEventTapLocation::HID);
    std::thread::sleep(POST_EVENT_SETTLE);
    check_kill_switch()?;
    Ok(())
}

pub fn keypress_impl(request: KeyPressRequest) -> Result<()> {
    ensure_kill_switch_tap_installed();
    check_kill_switch()?;

    let keycode = logical_key_to_keycode(request.key)?;

    if request.dry_run {
        tracing::info!(
            target = "car_desktop::keypress",
            key = ?request.key,
            keycode,
            pid = request.window.pid,
            "dry-run: skipping CGEventPost"
        );
        return Ok(());
    }

    let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventSourceCreate returned null".into(),
            source: None,
        }
    })?;
    let flags = modifiers_to_cg_flags(&request.modifiers);

    let down = CGEvent::new_keyboard_event(source.clone(), keycode, true).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventCreateKeyboardEvent down returned null".into(),
            source: None,
        }
    })?;
    down.set_flags(flags);
    down.post(CGEventTapLocation::HID);
    std::thread::sleep(POST_EVENT_SETTLE);

    let up = CGEvent::new_keyboard_event(source, keycode, false).map_err(|_| {
        CarDesktopError::OsApi {
            detail: "CGEventCreateKeyboardEvent up returned null".into(),
            source: None,
        }
    })?;
    up.set_flags(flags);
    up.post(CGEventTapLocation::HID);
    std::thread::sleep(POST_EVENT_SETTLE);

    check_kill_switch()?;
    Ok(())
}

// ─── Helpers ───────────────────────────────────────────────────

fn mouse_button_to_cg(b: MouseButton) -> CGMouseButton {
    match b {
        MouseButton::Left => CGMouseButton::Left,
        MouseButton::Right => CGMouseButton::Right,
        MouseButton::Middle => CGMouseButton::Center,
    }
}

fn modifiers_to_cg_flags(mods: &[Modifier]) -> CGEventFlags {
    let mut flags = CGEventFlags::CGEventFlagNull;
    for m in mods {
        flags |= match m {
            Modifier::Shift => CGEventFlags::CGEventFlagShift,
            Modifier::Control => CGEventFlags::CGEventFlagControl,
            Modifier::Alt => CGEventFlags::CGEventFlagAlternate,
            Modifier::Meta => CGEventFlags::CGEventFlagCommand,
        };
    }
    flags
}

/// Translate a logical `Key` to the macOS virtual keycode. The
/// keycode layout is the ANSI-US QWERTY default; alternate
/// keyboard layouts still receive these as "position-based"
/// presses, and modern apps interpret them through the active
/// input source. That's the correct behavior for programmatic
/// keypresses.
fn logical_key_to_keycode(k: Key) -> Result<CGKeyCode> {
    // Constants taken from Apple's HIToolbox/Events.h.
    Ok(match k {
        Key::Return => 36,
        Key::Escape => 53,
        Key::Tab => 48,
        Key::Space => 49,
        Key::Backspace => 51,
        Key::Delete => 117,
        Key::ArrowUp => 126,
        Key::ArrowDown => 125,
        Key::ArrowLeft => 123,
        Key::ArrowRight => 124,
        Key::Home => 115,
        Key::End => 119,
        Key::PageUp => 116,
        Key::PageDown => 121,
        Key::F1 => 122,
        Key::F2 => 120,
        Key::F3 => 99,
        Key::F4 => 118,
        Key::F5 => 96,
        Key::F6 => 97,
        Key::F7 => 98,
        Key::F8 => 100,
        Key::F9 => 101,
        Key::F10 => 109,
        Key::F11 => 103,
        Key::F12 => 111,
        Key::Comma => 43,
        Key::Period => 47,
        Key::Slash => 44,
        Key::Char(c) => char_to_keycode(c)?,
    })
}

fn char_to_keycode(c: char) -> Result<CGKeyCode> {
    // ANSI-US QWERTY virtual keycodes. Non-ASCII chars are pushed
    // to type_text (which uses set_string and doesn't need a
    // keycode); sending an unmapped char via keypress() is a
    // clear API misuse and gets an error.
    Ok(match c.to_ascii_lowercase() {
        'a' => 0, 's' => 1, 'd' => 2, 'f' => 3, 'h' => 4, 'g' => 5,
        'z' => 6, 'x' => 7, 'c' => 8, 'v' => 9, 'b' => 11, 'q' => 12,
        'w' => 13, 'e' => 14, 'r' => 15, 'y' => 16, 't' => 17, '1' => 18,
        '2' => 19, '3' => 20, '4' => 21, '6' => 22, '5' => 23, '=' => 24,
        '9' => 25, '7' => 26, '-' => 27, '8' => 28, '0' => 29, ']' => 30,
        'o' => 31, 'u' => 32, '[' => 33, 'i' => 34, 'p' => 35, 'l' => 37,
        'j' => 38, '\'' => 39, 'k' => 40, ';' => 41, '\\' => 42,
        'n' => 45, 'm' => 46, '`' => 50,
        other => {
            return Err(CarDesktopError::UnsupportedCharacter {
                codepoint: other as u32,
            });
        }
    })
}

// ─── Kill switch — Esc-Esc within 500ms ────────────────────────

fn check_kill_switch() -> Result<()> {
    if KILL_SWITCH.load(Ordering::SeqCst) {
        // Swallow the flag so a subsequent safe call succeeds
        // without the user re-registering the tap.
        KILL_SWITCH.store(false, Ordering::SeqCst);
        return Err(CarDesktopError::KillSwitchActivated);
    }
    Ok(())
}

fn ensure_kill_switch_tap_installed() {
    TAP_INIT.get_or_init(install_kill_switch_tap);
}

fn install_kill_switch_tap() {
    // Spawn the tap on a dedicated thread so the CFRunLoop owns it
    // for the process lifetime. We don't hold a handle because the
    // KILL_SWITCH static is the only communication channel we need.
    std::thread::Builder::new()
        .name("car-desktop-kill-switch".into())
        .spawn(|| {
            // Esc-Esc tracking state. `last_esc` is captured inside
            // the tap callback via Arc<AtomicI64>. When a second
            // Esc arrives within 500ms of a first, flip KILL_SWITCH.
            let last_esc_ms: Arc<std::sync::atomic::AtomicI64> =
                Arc::new(std::sync::atomic::AtomicI64::new(0));
            let last_esc_for_tap = Arc::clone(&last_esc_ms);

            let tap = CGEventTap::new(
                CGEventTapLocation::HID,
                CGEventTapPlacement::HeadInsertEventTap,
                CGEventTapOptions::ListenOnly,
                vec![CGEventType::KeyDown],
                move |_proxy, _etype, event| {
                    let keycode = event.get_integer_value_field(
                        core_graphics::event::EventField::KEYBOARD_EVENT_KEYCODE,
                    ) as i32;
                    if keycode == 53 {
                        // Esc keycode = 53.
                        let now_ms = chrono::Utc::now().timestamp_millis();
                        let prev = last_esc_for_tap.load(Ordering::SeqCst);
                        if prev != 0 && (now_ms - prev) <= 500 {
                            KILL_SWITCH.store(true, Ordering::SeqCst);
                            last_esc_for_tap.store(0, Ordering::SeqCst);
                        } else {
                            last_esc_for_tap.store(now_ms, Ordering::SeqCst);
                        }
                    }
                    None
                },
            );
            let Ok(tap) = tap else {
                // Tap creation needs Accessibility permission.
                // Without it the kill switch doesn't install;
                // input synthesis still works (this is a safety
                // fallback, not a gate). Log once and exit.
                tracing::warn!(
                    target = "car_desktop::kill_switch",
                    "Esc-Esc kill switch tap failed to install (Accessibility permission required); safety falls back to per-call rate limit + frame clamp",
                );
                return;
            };
            // Attach to the current thread's run loop and enter it.
            let source = tap.mach_port.create_runloop_source(0).ok();
            if let Some(source) = source {
                unsafe {
                    let cur = CFRunLoop::get_current();
                    cur.add_source(&source, kCFRunLoopCommonModes);
                }
                tap.enable();
                CFRunLoop::run_current();
            }
        })
        .ok();
}