car-desktop 0.9.0

OS-level screen capture, accessibility inspection, and input synthesis for Common Agent Runtime
Documentation
//! macOS window enumeration via `CGWindowListCopyWindowInfo` +
//! activation via `NSRunningApplication`.
//!
//! Populated in CD-02.
//!
//! ## Permissions caveat
//!
//! `CGWindowListCopyWindowInfo` without the Screen Recording TCC
//! permission will return window metadata (pid, layer, bounds) but
//! newer macOS releases (12.3+) redact the `kCGWindowName` field to
//! an empty string. Callers that need window titles should preflight
//! the permission via `MacBackend::permissions()` (CD-06) and prompt
//! the user. We do NOT fail the list call on missing permission —
//! returning degraded data is more useful than nothing when the
//! planner only needs to find a window by pid or bundle id.

use core_foundation::base::{CFType, TCFType};
use core_foundation::boolean::CFBoolean;
use core_foundation::dictionary::CFDictionary;
use core_foundation::number::CFNumber;
use core_foundation::string::CFString;
use core_graphics::window::{
    copy_window_info, kCGNullWindowID, kCGWindowBounds, kCGWindowIsOnscreen, kCGWindowLayer,
    kCGWindowListExcludeDesktopElements, kCGWindowListOptionOnScreenOnly, kCGWindowName,
    kCGWindowNumber, kCGWindowOwnerName, kCGWindowOwnerPID,
};
use objc2::rc::Retained;
use objc2_app_kit::{NSApplicationActivationOptions, NSRunningApplication};

use crate::errors::{CarDesktopError, Result};
use crate::models::{WindowFilter, WindowFrame, WindowHandle, WindowInfo};

/// Enumerate visible windows and return each one's metadata. The
/// passed filter is applied inside this function; on an empty list
/// match the result is `Ok(Vec::new())`, not an error.
pub fn list_windows_impl(filter: &WindowFilter) -> Result<Vec<WindowInfo>> {
    let options = kCGWindowListOptionOnScreenOnly | kCGWindowListExcludeDesktopElements;
    let list = copy_window_info(options, kCGNullWindowID).ok_or_else(|| {
        CarDesktopError::OsApi {
            detail: "CGWindowListCopyWindowInfo returned null".into(),
            source: None,
        }
    })?;

    // `copy_window_info` hands back `CFArray<void>` (an untyped
    // CFArrayRef); each element is a CFDictionaryRef. Walk with
    // indices since iter() gives `&ItemRef` that's fiddly to
    // unwrap back to CFDictionary.
    let count = list.len();
    let mut out = Vec::with_capacity(count as usize);
    for i in 0..count {
        let dict_ref = match list.get(i) {
            Some(item) => item,
            None => continue,
        };
        // SAFETY: the CFArray returned by CGWindowListCopyWindowInfo
        // is documented to carry CFDictionary entries with CFString
        // keys and CFType values.
        let dict_type_ref = *dict_ref as core_foundation::base::CFTypeRef;
        let dict: CFDictionary<CFString, CFType> = unsafe {
            CFDictionary::wrap_under_get_rule(dict_type_ref as _)
        };

        let Some(info) = window_info_from_dict(&dict) else {
            continue;
        };
        if filter.matches(&info) {
            out.push(info);
        }
    }

    Ok(out)
}

/// Bring a window's owning application forward. On success the
/// target app becomes the active app and the named window (the
/// CGWindow referenced by `handle`) receives key events.
///
/// Note: macOS does not let us focus a specific window of a
/// multi-window app independently — we activate the owning process
/// and rely on the OS's front-window semantics. For Tokhn's own
/// subprocess in the self-QA loop this is almost always sufficient,
/// because the child spawns with exactly one main window.
pub fn focus_window_impl(handle: WindowHandle) -> Result<()> {
    let pid = handle.pid as libc::pid_t;
    // SAFETY: runningApplicationWithProcessIdentifier is safe to call
    // for any pid value; it returns None on unknown pids. The returned
    // Retained<NSRunningApplication> owns its refcount.
    let app: Option<Retained<NSRunningApplication>> =
        unsafe { NSRunningApplication::runningApplicationWithProcessIdentifier(pid) };
    let Some(app) = app else {
        return Err(CarDesktopError::WindowNotFound {
            detail: format!("no running application for pid {}", handle.pid),
        });
    };
    // SAFETY: activateWithOptions is safe to invoke on any live
    // NSRunningApplication instance; the method documents its side
    // effects (activating the app / bringing it forward) and never
    // panics.
    let success: bool = unsafe {
        app.activateWithOptions(NSApplicationActivationOptions::NSApplicationActivateIgnoringOtherApps)
    };
    if !success {
        return Err(CarDesktopError::OsApi {
            detail: format!(
                "NSRunningApplication.activateWithOptions returned false for pid {}",
                handle.pid
            ),
            source: None,
        });
    }
    Ok(())
}

// ─── Dictionary extraction helpers ─────────────────────────────

fn window_info_from_dict(dict: &CFDictionary<CFString, CFType>) -> Option<WindowInfo> {
    let pid = cfnumber_value(dict, unsafe { kCGWindowOwnerPID })? as u32;
    let window_id = cfnumber_value(dict, unsafe { kCGWindowNumber })? as u64;
    let layer = cfnumber_value(dict, unsafe { kCGWindowLayer }).unwrap_or(0.0) as i32;
    let title = cfstring_value(dict, unsafe { kCGWindowName }).unwrap_or_default();
    let owner_name =
        cfstring_value(dict, unsafe { kCGWindowOwnerName }).unwrap_or_default();
    let frame = window_frame_from_bounds(dict).unwrap_or(WindowFrame {
        x: 0.0,
        y: 0.0,
        width: 0.0,
        height: 0.0,
    });
    let on_screen = cfboolean_value(dict, unsafe { kCGWindowIsOnscreen }).unwrap_or(true);
    let bundle_id = bundle_id_for_pid(pid);

    Some(WindowInfo {
        handle: WindowHandle::new(pid, window_id),
        title,
        bundle_id,
        owner_name,
        frame,
        layer,
        on_screen,
    })
}

fn bundle_id_for_pid(pid: u32) -> Option<String> {
    // SAFETY: see focus_window_impl — runningApplicationWithProcessIdentifier
    // is always safe and returns None on unknown pids.
    let app: Option<Retained<NSRunningApplication>> =
        unsafe { NSRunningApplication::runningApplicationWithProcessIdentifier(pid as libc::pid_t) };
    let app = app?;
    let ns_str = unsafe { app.bundleIdentifier() }?;
    // SAFETY: NSString → UTF-8 String conversion is memory-safe as
    // long as the NSString is live; the Retained borrow keeps it
    // alive for the duration of this scope.
    Some(unsafe { ns_str.to_string() })
}

fn window_frame_from_bounds(dict: &CFDictionary<CFString, CFType>) -> Option<WindowFrame> {
    let key_bounds = unsafe { CFString::wrap_under_get_rule(kCGWindowBounds) };
    let value = dict.find(&key_bounds)?;
    // `downcast::<CFDictionary>()` hands back the default
    // `CFDictionary<*const c_void, *const c_void>` form; re-wrap
    // the concrete CFDictionaryRef as the CFString/CFType-typed
    // variant so the rest of the lookups stay in the typed API.
    let bounds_untyped: CFDictionary = value.downcast::<CFDictionary>()?;
    let bounds: CFDictionary<CFString, CFType> =
        unsafe { CFDictionary::wrap_under_get_rule(bounds_untyped.as_concrete_TypeRef()) };
    let x = cfnumber_value_str(&bounds, "X")?;
    let y = cfnumber_value_str(&bounds, "Y")?;
    let width = cfnumber_value_str(&bounds, "Width")?;
    let height = cfnumber_value_str(&bounds, "Height")?;
    Some(WindowFrame {
        x,
        y,
        width,
        height,
    })
}

/// Fetch a CFNumber from the dict under a `CFStringRef` key and
/// return it as `f64` (handles both integer and float encodings).
fn cfnumber_value(
    dict: &CFDictionary<CFString, CFType>,
    key_ref: core_foundation::string::CFStringRef,
) -> Option<f64> {
    let key = unsafe { CFString::wrap_under_get_rule(key_ref) };
    let value = dict.find(&key)?;
    let number: CFNumber = value.downcast::<CFNumber>()?;
    number.to_f64().or_else(|| number.to_i64().map(|i| i as f64))
}

fn cfnumber_value_str(dict: &CFDictionary<CFString, CFType>, key_str: &str) -> Option<f64> {
    let key = CFString::new(key_str);
    let value = dict.find(&key)?;
    let number: CFNumber = value.downcast::<CFNumber>()?;
    number.to_f64().or_else(|| number.to_i64().map(|i| i as f64))
}

fn cfstring_value(
    dict: &CFDictionary<CFString, CFType>,
    key_ref: core_foundation::string::CFStringRef,
) -> Option<String> {
    let key = unsafe { CFString::wrap_under_get_rule(key_ref) };
    let value = dict.find(&key)?;
    let s: CFString = value.downcast::<CFString>()?;
    Some(s.to_string())
}

fn cfboolean_value(
    dict: &CFDictionary<CFString, CFType>,
    key_ref: core_foundation::string::CFStringRef,
) -> Option<bool> {
    let key = unsafe { CFString::wrap_under_get_rule(key_ref) };
    let value = dict.find(&key)?;
    let boolean: CFBoolean = value.downcast::<CFBoolean>()?;
    Some(boolean.into())
}