use std::{process::Command, time::Duration};
use anyhow::{Context as _, Result, anyhow};
use enigo::{
Axis, Button, Coordinate,
Direction::{Click, Press, Release},
Enigo, Key, Keyboard, Mouse, Settings,
};
use image::{ImageFormat, RgbaImage};
use tracing::{debug, warn};
use xcap::Monitor;
use crate::computer::{
action::{Action, ActionSpec, ExecCtx, MouseButton, Screenshot, ScrollDir},
operator::{ActionFut, ActionOutput, Operator, ScreenshotFut},
};
const ACTIVATE_DELAY_MS: u64 = 400;
pub struct NativeOperator;
impl NativeOperator {
pub fn new() -> Self {
Self
}
}
impl Default for NativeOperator {
fn default() -> Self {
Self::new()
}
}
impl Operator for NativeOperator {
fn name(&self) -> &'static str {
"native"
}
fn action_spaces(&self) -> Vec<ActionSpec> {
vec![
ActionSpec::new("click(start_box='<box>x1,y1</box>')"),
ActionSpec::new("left_double(start_box='<box>x1,y1</box>')"),
ActionSpec::new("right_single(start_box='<box>x1,y1</box>')"),
ActionSpec::new("drag(start_box='<box>x1,y1</box>', end_box='<box>x3,y3</box>')"),
ActionSpec::with_note("hotkey(key='')", "# Lowercase, space-separated, max 4 keys"),
ActionSpec::with_note("type(content='')", "# Add \\n at end of content to submit"),
ActionSpec::new(
"scroll(start_box='<box>x1,y1</box>', direction='down or up or right or left')",
),
ActionSpec::with_note(
"wait()",
"# Default sleep 1s and re-screenshot. Pass wait(seconds=5) for slow page loads (max 60).",
),
ActionSpec::with_note(
"activate_app(app='AppName')",
"# Bring an app forward when it's not visible. Use BEFORE clicking when the target app isn't on screen.",
),
ActionSpec::new("finished(content='xxx')"),
ActionSpec::with_note(
"call_user()",
"# Submit and call user when stuck or need help",
),
]
}
fn screenshot(&self) -> ScreenshotFut<'_> {
Box::pin(async move {
tokio::task::spawn_blocking(capture_primary_screen)
.await
.context("screenshot blocking task join failed")?
})
}
fn execute<'a>(&'a self, action: &'a Action, ctx: &'a ExecCtx) -> ActionFut<'a> {
Box::pin(async move {
let action = action.clone();
let ctx = *ctx;
match &action {
Action::Wait { seconds } => {
let s = seconds.clamp(0.0, 60.0);
tokio::time::sleep(Duration::from_secs_f32(s)).await;
return Ok(ActionOutput::ok());
}
Action::Finished { content } => {
debug!(content = %content, "native operator: finished");
return Ok(ActionOutput::ok());
}
Action::CallUser { reason } => {
debug!(reason = %reason, "native operator: call_user");
return Ok(ActionOutput::ok());
}
Action::ActivateApp { app } => {
return activate_app(app).await;
}
_ => {}
}
tokio::task::spawn_blocking(move || execute_blocking(&action, &ctx))
.await
.context("execute blocking task join failed")?
})
}
}
fn capture_primary_screen() -> Result<Screenshot> {
let monitors = Monitor::all().map_err(|e| anyhow!("xcap::Monitor::all failed: {e}"))?;
if monitors.is_empty() {
return Err(anyhow!("no monitors detected"));
}
let monitor = monitors
.iter()
.find(|m| m.is_primary().unwrap_or(false))
.unwrap_or(&monitors[0])
.clone();
let scale_factor = monitor.scale_factor().unwrap_or(1.0);
let monitor_w = monitor.width().unwrap_or(0);
let monitor_h = monitor.height().unwrap_or(0);
let img: RgbaImage = monitor
.capture_image()
.map_err(|e| anyhow!("xcap capture_image failed: {e}"))?;
let physical_w = img.width();
let physical_h = img.height();
let _ = (monitor_w, monitor_h);
let logical_w = if scale_factor > 0.0 {
(physical_w as f32 / scale_factor).round() as u32
} else {
physical_w
};
let logical_h = if scale_factor > 0.0 {
(physical_h as f32 / scale_factor).round() as u32
} else {
physical_h
};
let mut png_bytes: Vec<u8> = Vec::with_capacity(physical_w as usize * physical_h as usize / 4);
{
let mut cursor = std::io::Cursor::new(&mut png_bytes);
img.write_to(&mut cursor, ImageFormat::Png)
.context("PNG encode failed")?;
}
Ok(Screenshot {
png_bytes,
logical_size: (logical_w, logical_h),
physical_size: (physical_w, physical_h),
scale_factor,
})
}
fn execute_blocking(action: &Action, ctx: &ExecCtx) -> Result<ActionOutput> {
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
let err_str = e.to_string();
let hint = if cfg!(target_os = "macos")
&& (err_str.contains("permission") || err_str.contains("simulate input"))
{
" (macOS: open System Settings -> Privacy & Security -> \
Accessibility AND Input Monitoring, then add the \
currently-running rsclaw binary at \
target/debug/rsclaw or target/release/rsclaw. \
Restart the gateway after granting.)"
} else {
""
};
warn!(error = %e, hint, "failed to construct Enigo");
return Ok(ActionOutput::err(format!("enigo init failed: {e}{hint}")));
}
};
match action {
Action::MouseMove { x, y } => {
let (lx, ly) = scale_for_input(*x, *y, ctx.scale_factor);
try_input(enigo.move_mouse(lx, ly, Coordinate::Abs), "move_mouse")
}
Action::Click { x, y, button } => {
let (lx, ly) = scale_for_input(*x, *y, ctx.scale_factor);
if let Err(msg) = ok_or_msg(enigo.move_mouse(lx, ly, Coordinate::Abs), "move_mouse") {
return Ok(ActionOutput::err(msg));
}
let btn = map_button(*button);
try_input(enigo.button(btn, Click), "button")
}
Action::DoubleClick { x, y } => {
let (lx, ly) = scale_for_input(*x, *y, ctx.scale_factor);
if let Err(msg) = ok_or_msg(enigo.move_mouse(lx, ly, Coordinate::Abs), "move_mouse") {
return Ok(ActionOutput::err(msg));
}
if let Err(msg) = ok_or_msg(enigo.button(Button::Left, Click), "button") {
return Ok(ActionOutput::err(msg));
}
try_input(enigo.button(Button::Left, Click), "button")
}
Action::Drag {
from_x,
from_y,
to_x,
to_y,
} => {
let (fx, fy) = scale_for_input(*from_x, *from_y, ctx.scale_factor);
let (tx, ty) = scale_for_input(*to_x, *to_y, ctx.scale_factor);
if let Err(msg) = ok_or_msg(enigo.move_mouse(fx, fy, Coordinate::Abs), "move_mouse") {
return Ok(ActionOutput::err(msg));
}
if let Err(msg) = ok_or_msg(enigo.button(Button::Left, Press), "button press") {
return Ok(ActionOutput::err(msg));
}
if let Err(msg) = ok_or_msg(enigo.move_mouse(tx, ty, Coordinate::Abs), "drag move") {
let _ = enigo.button(Button::Left, Release);
return Ok(ActionOutput::err(msg));
}
try_input(enigo.button(Button::Left, Release), "button release")
}
Action::Scroll {
x,
y,
direction,
clicks,
} => {
let (lx, ly) = scale_for_input(*x, *y, ctx.scale_factor);
if let Err(msg) = ok_or_msg(enigo.move_mouse(lx, ly, Coordinate::Abs), "move_mouse") {
return Ok(ActionOutput::err(msg));
}
let (axis, length) = scroll_amount(*direction, *clicks);
try_input(enigo.scroll(length, axis), "scroll")
}
Action::Type { text } => type_text(&mut enigo, text),
Action::Hotkey { keys } => press_hotkey(&mut enigo, keys),
Action::HoldKey { key, seconds } => {
let Some(k) = parse_key(key) else {
return Ok(ActionOutput::err(format!("unknown key: {key}")));
};
let s = seconds.clamp(0.0, 60.0);
if let Err(msg) = ok_or_msg(enigo.key(k, Press), "key press") {
return Ok(ActionOutput::err(msg));
}
std::thread::sleep(Duration::from_secs_f32(s));
try_input(enigo.key(k, Release), "key release")
}
Action::Screenshot => {
let _ = capture_primary_screen()?;
Ok(ActionOutput::ok())
}
Action::Wait { .. }
| Action::Finished { .. }
| Action::CallUser { .. }
| Action::ActivateApp { .. } => Ok(ActionOutput::ok()),
}
}
fn scale_for_input(x: i32, y: i32, scale_factor: f32) -> (i32, i32) {
if cfg!(target_os = "macos") && scale_factor > 0.0 && (scale_factor - 1.0).abs() > f32::EPSILON
{
let lx = (x as f32 / scale_factor).round() as i32;
let ly = (y as f32 / scale_factor).round() as i32;
(lx, ly)
} else {
(x, y)
}
}
fn map_button(b: MouseButton) -> Button {
match b {
MouseButton::Left => Button::Left,
MouseButton::Right => Button::Right,
MouseButton::Middle => Button::Middle,
}
}
fn scroll_amount(dir: ScrollDir, clicks: i32) -> (Axis, i32) {
match dir {
ScrollDir::Up => (Axis::Vertical, -clicks),
ScrollDir::Down => (Axis::Vertical, clicks),
ScrollDir::Left => (Axis::Horizontal, -clicks),
ScrollDir::Right => (Axis::Horizontal, clicks),
}
}
fn decode_string_escapes(s: &str) -> String {
let cv: Vec<char> = s.chars().collect();
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < cv.len() {
let c = cv[i];
if c != '\\' || i + 1 >= cv.len() {
out.push(c);
i += 1;
continue;
}
let next = cv[i + 1];
match next {
'n' => {
out.push('\n');
i += 2;
}
't' => {
out.push('\t');
i += 2;
}
'r' => {
out.push('\r');
i += 2;
}
'\\' => {
out.push('\\');
i += 2;
}
'\'' => {
out.push('\'');
i += 2;
}
'"' => {
out.push('"');
i += 2;
}
'0' => {
out.push('\0');
i += 2;
}
'x' if i + 3 < cv.len()
&& cv[i + 2].is_ascii_hexdigit()
&& cv[i + 3].is_ascii_hexdigit() =>
{
let v = (cv[i + 2].to_digit(16).unwrap() << 4) | cv[i + 3].to_digit(16).unwrap();
if let Some(ch) = char::from_u32(v) {
out.push(ch);
i += 4;
} else {
out.push('\\');
out.push('x');
i += 2;
}
}
'u' if i + 5 < cv.len()
&& cv[i + 2..=i + 5].iter().all(char::is_ascii_hexdigit) =>
{
let v = (cv[i + 2].to_digit(16).unwrap() << 12)
| (cv[i + 3].to_digit(16).unwrap() << 8)
| (cv[i + 4].to_digit(16).unwrap() << 4)
| cv[i + 5].to_digit(16).unwrap();
if let Some(ch) = char::from_u32(v) {
out.push(ch);
i += 6;
} else {
out.push('\\');
out.push('u');
i += 2;
}
}
other => {
out.push('\\');
out.push(other);
i += 2;
}
}
}
out
}
fn type_text(enigo: &mut Enigo, text: &str) -> Result<ActionOutput> {
let decoded = decode_string_escapes(text);
let submit = decoded.ends_with('\n');
let body: &str = if submit {
decoded.strip_suffix('\n').unwrap_or(&decoded)
} else {
&decoded
};
if !body.is_empty() {
if let Err(msg) = ok_or_msg(enigo.text(body), "type text") {
return Ok(ActionOutput::err(msg));
}
}
if submit {
return try_input(enigo.key(Key::Return, Click), "submit return");
}
Ok(ActionOutput::ok())
}
fn press_hotkey(enigo: &mut Enigo, keys: &str) -> Result<ActionOutput> {
let tokens: Vec<&str> = keys
.split(|c: char| c.is_whitespace() || c == '+')
.filter(|s| !s.is_empty())
.collect();
if tokens.is_empty() {
return Ok(ActionOutput::err("empty hotkey"));
}
if tokens.len() > 4 {
return Ok(ActionOutput::err(format!(
"hotkey too long ({} keys, max 4)",
tokens.len()
)));
}
let mut parsed: Vec<Key> = Vec::with_capacity(tokens.len());
for tok in &tokens {
match parse_key(tok) {
Some(k) => parsed.push(k),
None => return Ok(ActionOutput::err(format!("unknown key: {tok}"))),
}
}
let mut pressed: Vec<Key> = Vec::with_capacity(parsed.len());
for k in &parsed {
if let Err(msg) = ok_or_msg(enigo.key(*k, Press), "hotkey press") {
for held in pressed.iter().rev() {
let _ = enigo.key(*held, Release);
}
return Ok(ActionOutput::err(msg));
}
pressed.push(*k);
}
let mut last_err: Option<String> = None;
for k in parsed.iter().rev() {
if let Err(msg) = ok_or_msg(enigo.key(*k, Release), "hotkey release") {
last_err = Some(msg);
}
}
match last_err {
Some(msg) => Ok(ActionOutput::err(msg)),
None => Ok(ActionOutput::ok()),
}
}
fn parse_key(raw: &str) -> Option<Key> {
let key = raw.trim().to_lowercase();
let k = match key.as_str() {
"return" | "enter" => Key::Return,
"ctrl" | "control" => Key::Control,
"shift" => Key::Shift,
"alt" | "option" => Key::Alt,
"cmd" | "command" | "meta" | "win" | "super" => Key::Meta,
"tab" => Key::Tab,
"escape" | "esc" => Key::Escape,
"space" | "spacebar" => Key::Space,
"backspace" => Key::Backspace,
"delete" | "del" => Key::Delete,
"up" | "arrowup" | "uparrow" => Key::UpArrow,
"down" | "arrowdown" | "downarrow" => Key::DownArrow,
"left" | "arrowleft" | "leftarrow" => Key::LeftArrow,
"right" | "arrowright" | "rightarrow" => Key::RightArrow,
"pageup" | "pgup" => Key::PageUp,
"pagedown" | "pgdn" => Key::PageDown,
"home" => Key::Home,
"end" => Key::End,
"capslock" => Key::CapsLock,
"f1" => Key::F1,
"f2" => Key::F2,
"f3" => Key::F3,
"f4" => Key::F4,
"f5" => Key::F5,
"f6" => Key::F6,
"f7" => Key::F7,
"f8" => Key::F8,
"f9" => Key::F9,
"f10" => Key::F10,
"f11" => Key::F11,
"f12" => Key::F12,
s if s.chars().count() == 1 => Key::Unicode(s.chars().next()?),
_ => return None,
};
Some(k)
}
fn ok_or_msg<E: std::fmt::Display>(
res: std::result::Result<(), E>,
op: &'static str,
) -> std::result::Result<(), String> {
res.map_err(|e| format!("{op}: {e}"))
}
fn try_input<E: std::fmt::Display>(
res: std::result::Result<(), E>,
op: &'static str,
) -> Result<ActionOutput> {
match res {
Ok(()) => Ok(ActionOutput::ok()),
Err(e) => {
warn!(operation = op, error = %e, "enigo input error");
Ok(ActionOutput::err(format!("{op}: {e}")))
}
}
}
async fn activate_app(app: &str) -> Result<ActionOutput> {
let app = app.to_owned();
let res = tokio::task::spawn_blocking(move || activate_app_blocking(&app))
.await
.context("activate_app join failed")?;
Ok(res)
}
fn activate_app_blocking(app: &str) -> ActionOutput {
#[cfg(target_os = "macos")]
{
let mut errors = Vec::new();
for candidate in macos_app_candidates(app) {
match activate_macos_app(&candidate) {
Ok(()) => return ActionOutput::ok(),
Err(e) => errors.push(format!("{candidate}: {e}")),
}
}
return ActionOutput::err(format!("activate_app failed: {}", errors.join("; ")));
}
#[cfg(not(target_os = "macos"))]
activate_app_blocking_non_macos(app)
}
#[cfg(not(target_os = "macos"))]
fn activate_app_blocking_non_macos(app: &str) -> ActionOutput {
if cfg!(target_os = "windows") {
let escaped = app
.replace('`', "``")
.replace('*', "`*")
.replace('?', "`?")
.replace('[', "`[")
.replace(']', "`]")
.replace('\'', "''");
let ps = format!(
r#"Add-Type -Name W -Namespace N -MemberDefinition '[DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd);'; Get-Process | Where-Object {{$_.ProcessName -like '*{}*'}} | ForEach-Object {{ if ($_.MainWindowHandle -ne 0) {{ [N.W]::SetForegroundWindow($_.MainWindowHandle) }} }}"#,
escaped
);
#[allow(unused_mut)]
let mut ps_cmd = Command::new("powershell");
ps_cmd.args(["-NoProfile", "-Command", &ps]);
#[cfg(windows)]
{
use std::os::windows::process::CommandExt;
ps_cmd.creation_flags(0x08000000);
}
match ps_cmd.output()
{
Ok(out) if out.status.success() => ActionOutput::ok(),
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr).into_owned();
ActionOutput::err(format!("powershell failed: {stderr}"))
}
Err(e) => ActionOutput::err(format!("powershell spawn failed: {e}")),
}
} else if cfg!(target_os = "linux") {
let wmctrl = Command::new("wmctrl").args(["-a", &app]).status();
if matches!(&wmctrl, Ok(s) if s.success()) {
return ActionOutput::ok();
}
let xdotool = Command::new("xdotool")
.args(["search", "--class", &app, "windowactivate"])
.status();
match xdotool {
Ok(s) if s.success() => ActionOutput::ok(),
Ok(s) => ActionOutput::err(format!("xdotool exit status: {s}")),
Err(e) => ActionOutput::err(format!("neither wmctrl nor xdotool worked: {e}")),
}
} else {
ActionOutput::err("activate_app: unsupported platform")
}
}
#[cfg(target_os = "macos")]
fn macos_app_candidates(app: &str) -> Vec<String> {
let trimmed = app.trim();
let mut candidates = Vec::new();
if !trimmed.is_empty() {
candidates.push(trimmed.to_owned());
}
match trimmed.to_ascii_lowercase().as_str() {
"wechat" => {
candidates.push("WeChat".to_owned());
candidates.push("微信".to_owned());
}
"weixin" => {
candidates.push("WeChat".to_owned());
candidates.push("微信".to_owned());
}
"douyin" | "tiktok" => candidates.push("抖音".to_owned()),
_ => {}
}
candidates.dedup();
candidates
}
#[cfg(target_os = "macos")]
fn activate_macos_app(app: &str) -> std::result::Result<(), String> {
let escaped = app.replace('\\', "\\\\").replace('"', "\\\"");
let script = format!(
r#"tell application "{escaped}" to activate
delay 0.4
tell application "System Events"
if exists process "{escaped}" then
tell process "{escaped}"
set frontmost to true
if (count of windows) > 0 then
try
perform action "AXRaise" of window 1
end try
end if
end tell
end if
end tell"#
);
match Command::new("osascript").arg("-e").arg(&script).output() {
Ok(out) if out.status.success() => Ok(()),
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_owned();
let open = Command::new("open").args(["-a", app]).output();
match open {
Ok(open_out) if open_out.status.success() => {
std::thread::sleep(Duration::from_millis(ACTIVATE_DELAY_MS));
Ok(())
}
Ok(open_out) => {
let open_stderr = String::from_utf8_lossy(&open_out.stderr).trim().to_owned();
Err(format!(
"osascript failed: {stderr}; open -a failed: {open_stderr}"
))
}
Err(e) => Err(format!(
"osascript failed: {stderr}; open -a spawn failed: {e}"
)),
}
}
Err(e) => Err(format!("osascript spawn failed: {e}")),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_constructs() {
let _op: NativeOperator = <NativeOperator as Default>::default();
let _op2 = NativeOperator::new();
}
#[test]
fn name_is_native() {
let op = NativeOperator::new();
assert_eq!(op.name(), "native");
}
#[test]
fn action_spaces_advertise_core_capabilities() {
let op = NativeOperator::new();
let specs = op.action_spaces();
assert!(
specs.len() >= 9,
"expected at least 9 action specs, got {}",
specs.len()
);
let sigs: Vec<&str> = specs.iter().map(|s| s.signature).collect();
for needle in ["click", "type", "hotkey", "scroll", "wait", "finished"] {
assert!(
sigs.iter().any(|s| s.contains(needle)),
"action_spaces missing `{needle}`: {:?}",
sigs
);
}
}
#[test]
fn parse_key_handles_modifiers_and_chars() {
assert!(matches!(parse_key("cmd"), Some(Key::Meta)));
assert!(matches!(parse_key("CTRL"), Some(Key::Control)));
assert!(matches!(parse_key("return"), Some(Key::Return)));
assert!(matches!(parse_key("a"), Some(Key::Unicode('a'))));
assert!(parse_key("not-a-real-key").is_none());
}
#[test]
fn scroll_amount_signs() {
assert_eq!(scroll_amount(ScrollDir::Up, 3), (Axis::Vertical, -3));
assert_eq!(scroll_amount(ScrollDir::Down, 3), (Axis::Vertical, 3));
assert_eq!(scroll_amount(ScrollDir::Left, 2), (Axis::Horizontal, -2));
assert_eq!(scroll_amount(ScrollDir::Right, 2), (Axis::Horizontal, 2));
}
#[test]
fn decode_string_escapes_handles_common_sequences() {
assert_eq!(decode_string_escapes(r"hi\n"), "hi\n");
assert_eq!(decode_string_escapes(r"tab\there"), "tab\there");
assert_eq!(decode_string_escapes(r"crlf\r\n"), "crlf\r\n");
assert_eq!(decode_string_escapes(r"quote\'mark"), "quote'mark");
assert_eq!(decode_string_escapes(r#"dquote\""#), "dquote\"");
assert_eq!(decode_string_escapes(r"slash\\path"), r"slash\path");
assert_eq!(decode_string_escapes(r"unknown\x"), r"unknown\x");
assert_eq!(decode_string_escapes(r"trail\"), r"trail\");
assert_eq!(
decode_string_escapes("正式版即将上线,敬请期待。"),
"正式版即将上线,敬请期待。"
);
}
#[test]
fn decode_string_escapes_handles_hex_and_unicode() {
assert_eq!(decode_string_escapes(r"a\x41b"), "aAb");
assert_eq!(decode_string_escapes(r"\x00end"), "\0end");
let cjk_input = format!("hi {}u4e2d!", '\\');
assert_eq!(
decode_string_escapes(&cjk_input),
format!("hi {}!", '\u{4e2d}')
);
let acc_input = format!("{}u00e9clair", '\\');
assert_eq!(
decode_string_escapes(&acc_input),
format!("{}clair", '\u{00e9}')
);
assert_eq!(decode_string_escapes(r"\xZZ"), r"\xZZ");
assert_eq!(decode_string_escapes(r"\u12"), r"\u12");
assert_eq!(decode_string_escapes(r"\ud800"), r"\ud800");
}
}