rsclaw-agent 0.1.0

Agent crate for RsClaw — internal workspace crate, not for direct use
//! Platform-specific helpers — Chrome detection, key mapping, display checks.
//!
//! Extracted from `runtime.rs` to reduce file size.

use anyhow::Result;

/// Check if a graphical display is available.
pub(crate) fn has_display() -> bool {
    if cfg!(target_os = "macos") || cfg!(target_os = "windows") {
        true
    } else {
        std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
    }
}

/// Display backing-scale factor (physical pixels per logical point).
///
/// On macOS this is 2.0 on Retina displays and 1.0 elsewhere; cliclick and
/// `screencapture -R` take *logical* point coordinates, but our screenshot
/// pipeline returns physical-pixel dimensions, so the click handlers must
/// divide incoming physical-pixel coords by this factor before calling out
/// to cliclick / screencapture.
///
/// Cached lazily — detection shells out once per process (osascript + a
/// transient screencapture) then reuses the result. Returns 1.0 if detection
/// fails so callers stay correct on non-HiDPI Macs and other platforms.
pub(crate) fn display_logical_scale() -> f64 {
    use std::sync::OnceLock;
    static CACHE: OnceLock<f64> = OnceLock::new();
    *CACHE.get_or_init(detect_display_logical_scale)
}

#[cfg(target_os = "macos")]
fn detect_display_logical_scale() -> f64 {
    // osascript returns logical (point) bounds of the desktop, e.g.
    // `0, 0, 1512, 982` on a 14" M1 Pro. Compare to a transient screencapture
    // PNG header (physical pixels) to derive the ratio. We capture to a tiny
    // tempfile and read just the IHDR chunk (24 bytes is enough).
    let logical_w = std::process::Command::new("osascript")
        .args([
            "-e",
            "tell application \"Finder\" to set b to bounds of window of desktop",
            "-e",
            "return item 3 of b",
        ])
        .output()
        .ok()
        .and_then(|o| {
            if !o.status.success() {
                return None;
            }
            String::from_utf8_lossy(&o.stdout)
                .trim()
                .parse::<f64>()
                .ok()
        });

    let logical_w = match logical_w {
        Some(w) if w > 0.0 => w,
        _ => return 1.0,
    };

    let tmp = std::env::temp_dir().join(format!("rsclaw_dpi_probe_{}.png", std::process::id()));
    let captured = std::process::Command::new("screencapture")
        .args(["-x", "-t", "png"])
        .arg(&tmp)
        .output()
        .ok()
        .map(|o| o.status.success())
        .unwrap_or(false);
    if !captured {
        return 1.0;
    }
    let bytes = std::fs::read(&tmp).ok();
    let _ = std::fs::remove_file(&tmp);

    let physical_w = bytes.and_then(|b| {
        if b.len() < 24 {
            return None;
        }
        Some(u32::from_be_bytes([b[16], b[17], b[18], b[19]]) as f64)
    });
    match physical_w {
        Some(p) if p > 0.0 => (p / logical_w).max(1.0),
        _ => 1.0,
    }
}

#[cfg(not(target_os = "macos"))]
fn detect_display_logical_scale() -> f64 {
    // Linux/X11 and most Windows configurations operate the input simulators
    // in physical-pixel space already, so no conversion is needed.
    1.0
}

// detect_chrome/detect_ffmpeg lifted to rsclaw-platform (crate-split); re-exported.
pub use rsclaw_platform::{detect_chrome, detect_ffmpeg};

/// Like `detect_chrome` but auto-installs Chrome for Testing on miss.
/// First call is slow (downloads ~150MB); subsequent calls are instant.
/// Returns the absolute path to a Chrome binary, or an error if both
/// detection and install fail.
pub async fn ensure_chrome() -> Result<String> {
    if let Some(p) = detect_chrome() {
        return Ok(p);
    }
    tracing::info!("Chrome not found locally, auto-installing Chrome for Testing");
    rsclaw_tools::cmd_install("chrome", false).await?;
    detect_chrome().ok_or_else(|| {
        anyhow::anyhow!("Chrome auto-install completed but binary still not detected")
    })
}


/// Like `detect_ffmpeg` but auto-installs ffmpeg on miss (downloads ~80MB).
pub(crate) async fn ensure_ffmpeg() -> Result<String> {
    if let Some(p) = detect_ffmpeg() {
        return Ok(p);
    }
    tracing::info!("ffmpeg not found locally, auto-installing");
    rsclaw_tools::cmd_install("ffmpeg", false).await?;
    detect_ffmpeg().ok_or_else(|| {
        anyhow::anyhow!("ffmpeg auto-install completed but binary still not detected")
    })
}

/// Run a subprocess and return an error if it fails.
pub(crate) async fn run_subprocess(cmd: &str, args: &[&str]) -> Result<()> {
    #[allow(unused_mut)]
    let mut sub = tokio::process::Command::new(cmd);
    sub.args(args);
    #[cfg(windows)]
    {
        use std::os::windows::process::CommandExt;
        sub.creation_flags(0x08000000);
    }
    let output = sub
        .output()
        .await
        .map_err(|e| anyhow::anyhow!("{cmd}: {e}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(anyhow::anyhow!("{cmd} failed: {stderr}"));
    }
    Ok(())
}

/// Parse JPEG dimensions from SOF0/SOF2 marker (no external deps).
pub(crate) fn jpeg_dimensions(data: &[u8]) -> Option<(u32, u32)> {
    let mut i = 0;
    while i + 1 < data.len() {
        if data[i] != 0xFF {
            i += 1;
            continue;
        }
        let marker = data[i + 1];
        i += 2;
        // SOF0 (0xC0) or SOF2 (0xC2) contain dimensions
        if marker == 0xC0 || marker == 0xC2 {
            if i + 7 <= data.len() {
                let h = u16::from_be_bytes([data[i + 3], data[i + 4]]) as u32;
                let w = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32;
                return Some((w, h));
            }
            return None;
        }
        // Skip segment
        if marker >= 0xC0 && marker != 0xD8 && marker != 0xD9 && marker != 0x00 {
            if i + 2 <= data.len() {
                let len = u16::from_be_bytes([data[i], data[i + 1]]) as usize;
                i += len;
            } else {
                break;
            }
        }
    }
    None
}

/// Run a PowerShell snippet with the required assemblies pre-loaded.
/// Used for Windows computer_use actions (mouse, keyboard).
pub(crate) async fn run_powershell_input(script: &str) -> Result<()> {
    let full = format!(
        "Add-Type -AssemblyName System.Windows.Forms; Add-Type -AssemblyName System.Drawing; {script}"
    );
    run_subprocess("powershell", &["-NoProfile", "-Command", &full]).await
}

/// Windows: mouse click with P/Invoke. Supports left/right/middle and repeat
/// count.
///
/// Still used by `triple_click` in `tools_computer.rs` (the rest of the
/// click/move pipeline now goes through enigo via `NativeOperator`).
pub(crate) async fn win_mouse_click(x: i64, y: i64, button: &str, clicks: i32) -> Result<()> {
    let (down_flag, up_flag) = match button {
        "right" => ("0x0008", "0x0010"),
        "middle" => ("0x0020", "0x0040"),
        _ => ("0x0002", "0x0004"),
    };
    run_powershell_input(&format!(
        r#"Add-Type @"
using System;
using System.Runtime.InteropServices;
public class WinClick {{
    [DllImport("user32.dll")] public static extern bool SetCursorPos(int x, int y);
    [DllImport("user32.dll")] static extern void mouse_event(uint f, uint dx, uint dy, uint d, int e);
    public static void Click(int x, int y, uint down, uint up, int n) {{
        SetCursorPos(x, y);
        for (int i = 0; i < n; i++) {{
            mouse_event(down, 0, 0, 0, 0);
            mouse_event(up, 0, 0, 0, 0);
            if (i < n - 1) System.Threading.Thread.Sleep(50);
        }}
    }}
}}
"@
[WinClick]::Click({x}, {y}, {down_flag}, {up_flag}, {clicks})"#
    )).await
}

/// Match user text against installed skills by keyword overlap.
#[allow(dead_code)]
pub(crate) fn match_skills<'a>(
    text: &str,
    skills: &'a rsclaw_skill::SkillRegistry,
) -> Vec<&'a rsclaw_skill::SkillManifest> {
    if text.trim().is_empty() {
        return Vec::new();
    }
    let lower = text.to_lowercase();
    let mut matched = Vec::new();

    for skill in skills.all() {
        // Skip tool-based skills.
        if !skill.tools.is_empty() {
            continue;
        }
        // Skip skills with no prompt body.
        if skill.prompt.trim().is_empty() {
            continue;
        }

        // Build keyword set from skill name + description.
        let mut keywords: Vec<&str> = Vec::new();

        for part in skill.name.split(|c: char| c == '-' || c == '_' || c == ' ') {
            let p = part.trim();
            if p.len() >= 2 {
                keywords.push(p);
            }
        }

        if let Some(ref desc) = skill.description {
            for word in desc.split(|c: char| !c.is_alphanumeric() && c != '/' && c != '.') {
                let w = word.trim();
                if w.len() >= 2 {
                    keywords.push(w);
                }
            }
        }

        let hit = keywords.iter().any(|kw| {
            let kl = kw.to_lowercase();
            if matches!(
                kl.as_str(),
                "the"
                    | "and"
                    | "for"
                    | "with"
                    | "use"
                    | "when"
                    | "from"
                    | "create"
                    | "edit"
                    | "file"
                    | "files"
                    | "data"
                    | "tool"
                    | "agent"
                    | ""
                    | ""
                    | ""
                    | ""
                    | ""
                    | ""
            ) {
                return false;
            }
            lower.contains(&kl)
        });

        if hit {
            matched.push(skill);
        }
    }

    matched
}

/// Create a `tokio::process::Command` for PowerShell that hides the console
/// window.
pub(crate) fn powershell_hidden() -> tokio::process::Command {
    #[cfg(target_os = "windows")]
    {
        use std::os::windows::process::CommandExt;
        let mut cmd = tokio::process::Command::new("powershell");
        cmd.creation_flags(0x08000000); // CREATE_NO_WINDOW
        cmd.arg("-NoProfile").arg("-WindowStyle").arg("Hidden");
        cmd
    }
    #[cfg(not(target_os = "windows"))]
    {
        let mut cmd = tokio::process::Command::new("powershell");
        cmd.arg("-NoProfile");
        cmd
    }
}

/// Detected PowerShell major edition.
///
/// `Desktop` = Windows PowerShell 5.1 (legacy, .NET Framework, shipped with
/// Windows). `Core` = PowerShell 7+ (cross-platform, .NET Core). Their
/// language surfaces diverge sharply (`&&`, `??`, ternary, `2>&1` semantics,
/// default file encoding) so we steer the model with different prompts.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum PowerShellEdition {
    /// Windows PowerShell 5.1.
    Desktop,
    /// PowerShell 7+ (pwsh).
    Core,
}

/// Detect the installed PowerShell edition once per process.
///
/// Cheap probe: shells out to `powershell -NoProfile -Command
/// "$PSVersionTable.PSVersion.Major"` and reads the integer. Returns `None` if
/// PowerShell is unavailable or the probe fails. Result is cached for the
/// process lifetime.
pub(crate) fn detect_powershell_edition() -> Option<PowerShellEdition> {
    use std::sync::OnceLock;
    static CACHE: OnceLock<Option<PowerShellEdition>> = OnceLock::new();
    *CACHE.get_or_init(probe_powershell_edition)
}

fn probe_powershell_edition() -> Option<PowerShellEdition> {
    // Prefer pwsh (7+) when present; fall back to powershell (typically 5.1
    // on Windows, or possibly 7+ on PATH on Linux/macOS).
    let bin = if which_in_path("pwsh") {
        "pwsh"
    } else if which_in_path("powershell") {
        "powershell"
    } else {
        return None;
    };

    #[allow(unused_mut)]
    let mut probe = std::process::Command::new(bin);
    probe
        .args(["-NoProfile", "-Command", "$PSVersionTable.PSVersion.Major"])
        .stdout(std::process::Stdio::piped())
        .stderr(std::process::Stdio::null());
    #[cfg(windows)]
    {
        use std::os::windows::process::CommandExt;
        probe.creation_flags(0x08000000);
    }
    let output = probe.output().ok()?;
    if !output.status.success() {
        return None;
    }
    let major: u32 = String::from_utf8_lossy(&output.stdout)
        .trim()
        .parse()
        .ok()?;
    if major >= 6 {
        Some(PowerShellEdition::Core)
    } else {
        Some(PowerShellEdition::Desktop)
    }
}

/// Cheap PATH lookup avoiding the `which` crate dependency.
fn which_in_path(bin: &str) -> bool {
    let path = match std::env::var_os("PATH") {
        Some(p) => p,
        None => return false,
    };
    let exts: &[&str] = if cfg!(target_os = "windows") {
        &[".exe", ".cmd", ".bat", ""]
    } else {
        &[""]
    };
    for dir in std::env::split_paths(&path) {
        for ext in exts {
            let candidate = dir.join(format!("{bin}{ext}"));
            if candidate.is_file() {
                return true;
            }
        }
    }
    false
}