mermaid_cli/providers/tool/computer_use/
press_key.rs1use std::sync::Arc;
8use std::time::Instant;
9
10use async_trait::async_trait;
11use serde_json::Value;
12
13use crate::constants::POST_KEY_DELAY_MS;
14use crate::domain::{ToolDefinition, ToolOutcome};
15use crate::providers::ctx::{ExecContext, ProgressEvent};
16
17use super::super::ToolExecutor;
18use super::computer_use_success;
19use super::driver::ComputerUseDriver;
20
21pub struct PressKeyTool {
22 driver: Arc<ComputerUseDriver>,
23}
24
25impl PressKeyTool {
26 pub fn new(driver: Arc<ComputerUseDriver>) -> Self {
27 Self { driver }
28 }
29}
30
31#[async_trait]
32impl ToolExecutor for PressKeyTool {
33 fn name(&self) -> &'static str {
34 "press_key"
35 }
36
37 fn schema(&self) -> ToolDefinition {
38 ToolDefinition {
39 name: "press_key".to_string(),
40 description: "Press a key or chord: 'Return', 'Escape', 'Tab', 'F5', 'ctrl+shift+t', \
41 'alt+Tab', etc. Uses xdotool/wtype naming. ALWAYS click the target \
42 window first. Auto-captures the focused window afterwards."
43 .to_string(),
44 input_schema: serde_json::json!({
45 "type": "object",
46 "properties": { "key": { "type": "string" } },
47 "required": ["key"]
48 }),
49 }
50 }
51
52 async fn execute(&self, args: Value, ctx: ExecContext) -> ToolOutcome {
53 let started = Instant::now();
54 if let Err(error) = self.driver.ensure_alive() {
55 return ToolOutcome::error(error, started.elapsed().as_secs_f64());
56 }
57 let key = match args.get("key").and_then(|v| v.as_str()) {
58 Some(s) => s.to_string(),
59 None => {
60 return ToolOutcome::error(
61 "press_key requires `key` string",
62 started.elapsed().as_secs_f64(),
63 );
64 },
65 };
66
67 let res = tokio::select! {
68 biased;
69 _ = ctx.token.cancelled() => return ToolOutcome::cancelled(),
70 r = self.driver.press_key(&key, &ctx.token) => r,
71 };
72 if let Err(e) = res {
73 return ToolOutcome::error(
74 format!("press_key failed: {}", e),
75 started.elapsed().as_secs_f64(),
76 );
77 }
78
79 tokio::time::sleep(std::time::Duration::from_millis(POST_KEY_DELAY_MS)).await;
80 let base_msg = format!("Pressed: {}", key);
81
82 let (summary, image) = match self.driver.capture_focused_for_autoshot(&ctx.token).await {
83 Some((s, b64)) => (Some(s), Some(b64)),
84 None => (None, None),
85 };
86
87 if let Some(b64) = &image
88 && let Ok(bytes) =
89 base64::Engine::decode(&base64::engine::general_purpose::STANDARD, b64)
90 {
91 let _ = ctx
92 .progress
93 .send(ProgressEvent::Artifact {
94 mime: "image/png".to_string(),
95 data: bytes,
96 caption: Some("press_key auto-screenshot".to_string()),
97 })
98 .await;
99 }
100
101 let out = match &summary {
102 Some(s) => format!("{}\n[auto-screenshot: {}]", base_msg, s),
103 None => base_msg,
104 };
105 let mut outcome =
106 computer_use_success("press_key", args, out, started.elapsed().as_secs_f64());
107 if let Some(image) = image {
108 outcome = outcome.with_images(vec![image]);
109 }
110 outcome
111 }
112}