Skip to main content

codetether_agent/tool/computer_use/
input.rs

1//! Input types for the computer_use tool.
2
3#[derive(Copy, Clone, Debug, serde::Deserialize)]
4#[serde(rename_all = "snake_case")]
5pub enum ComputerUseAction {
6    /// Check OS support and permission status
7    Status,
8    /// List visible controllable apps/windows
9    ListApps,
10    /// Request approval for target app
11    RequestApp,
12    /// Capture screenshot/accessibility snapshot
13    Snapshot,
14    /// Screenshot a specific window by hwnd
15    WindowSnapshot,
16    /// Click on coordinates or target
17    Click,
18    /// Right-click on coordinates
19    RightClick,
20    /// Double-click on coordinates
21    DoubleClick,
22    /// Drag from one point to another
23    Drag,
24    /// Type text using native input
25    TypeText,
26    /// Press key or hotkey
27    PressKey,
28    /// Scroll native window
29    Scroll,
30    /// Bring a window to the foreground
31    BringToFront,
32    /// Wait N milliseconds for UI to settle
33    WaitMs,
34    /// Stop active control
35    Stop,
36}
37
38#[derive(Clone, Debug, serde::Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub struct ComputerUseInput {
41    pub action: ComputerUseAction,
42    #[serde(default)]
43    pub app: Option<String>,
44    #[serde(default)]
45    pub window_title_contains: Option<String>,
46    #[serde(default)]
47    pub text: Option<String>,
48    #[serde(default)]
49    pub key: Option<String>,
50    #[serde(default)]
51    pub scroll_amount: Option<i32>,
52    /// HWND of the target window (for bring_to_front, window_snapshot)
53    #[serde(default)]
54    pub hwnd: Option<i64>,
55    #[serde(default)]
56    pub x: Option<f64>,
57    #[serde(default)]
58    pub y: Option<f64>,
59    /// End X coordinate for drag
60    #[serde(default)]
61    pub x2: Option<f64>,
62    /// End Y coordinate for drag
63    #[serde(default)]
64    pub y2: Option<f64>,
65    /// Milliseconds to wait (for wait_ms)
66    #[serde(default)]
67    pub ms: Option<u64>,
68}