1use std::collections::VecDeque;
23use std::path::PathBuf;
24use std::sync::Mutex;
25use std::sync::atomic::{AtomicU64, Ordering};
26
27use anyhow::{Context, Result};
28use base64::{Engine as _, engine::general_purpose};
29use tokio::process::Command;
30use tokio_util::sync::CancellationToken;
31
32use crate::constants::{SCREENSHOT_MAX_WIDTH, SCREENSHOT_REGISTRY_CAPACITY};
33
34use super::Backend;
35
36#[derive(Debug, Clone)]
39pub struct ScreenshotMetadata {
40 pub id: u64,
41 pub scale_factor: f64,
42 pub offset_x: i32,
43 pub offset_y: i32,
44 pub kind: String,
48}
49
50#[derive(Debug, Default)]
56pub struct ScreenshotRegistry {
57 entries: VecDeque<ScreenshotMetadata>,
58}
59
60impl ScreenshotRegistry {
61 pub fn new() -> Self {
62 Self {
63 entries: VecDeque::new(),
64 }
65 }
66
67 pub fn push(&mut self, meta: ScreenshotMetadata) {
68 if self.entries.len() >= SCREENSHOT_REGISTRY_CAPACITY {
69 self.entries.pop_front();
70 }
71 self.entries.push_back(meta);
72 }
73
74 pub fn get(&self, id: u64) -> Option<&ScreenshotMetadata> {
75 self.entries.iter().find(|m| m.id == id)
76 }
77
78 pub fn latest(&self) -> Option<&ScreenshotMetadata> {
79 self.entries.back()
80 }
81
82 pub fn len(&self) -> usize {
83 self.entries.len()
84 }
85
86 pub fn is_empty(&self) -> bool {
87 self.entries.is_empty()
88 }
89}
90
91#[derive(Debug, Clone)]
94pub enum ScreenshotSpec {
95 Fullscreen,
96 Focused,
97 Monitor(String),
98 Region(i32, i32, u32, u32),
100 Window(String),
101}
102
103#[derive(Debug)]
106pub struct CaptureResult {
107 pub id: u64,
108 pub base64_png: String,
109 pub raw_bytes: Vec<u8>,
110 pub width: u32,
111 pub height: u32,
112 pub scale_factor: f64,
113 pub offset_x: i32,
114 pub offset_y: i32,
115 pub summary: String,
116}
117
118pub struct ComputerUseDriver {
120 backend: Backend,
121 registry: Mutex<ScreenshotRegistry>,
122 file_counter: AtomicU64,
126 id_counter: AtomicU64,
129}
130
131impl ComputerUseDriver {
132 pub fn new(backend: Backend) -> Self {
133 Self {
134 backend,
135 registry: Mutex::new(ScreenshotRegistry::new()),
136 file_counter: AtomicU64::new(0),
137 id_counter: AtomicU64::new(0),
138 }
139 }
140
141 pub fn backend(&self) -> Backend {
142 self.backend
143 }
144
145 pub fn ensure_alive(&self) -> Result<(), String> {
150 if super::display_is_reachable(self.backend) {
151 Ok(())
152 } else {
153 Err(format!(
154 "Display unreachable (backend={:?}). Was the session \
155 detached, or did `DISPLAY` change?",
156 self.backend
157 ))
158 }
159 }
160
161 pub fn scale_coords(
164 &self,
165 x: i32,
166 y: i32,
167 screenshot_id: Option<u64>,
168 ) -> Result<(i32, i32), String> {
169 let reg = self.registry.lock().map_err(|e| e.to_string())?;
170 let meta = match screenshot_id {
171 Some(id) => reg.get(id).cloned().ok_or_else(|| {
172 format!(
173 "Screenshot id {} not found in registry (likely evicted — capacity {}). \
174 Take a fresh screenshot and retry with the new id.",
175 id, SCREENSHOT_REGISTRY_CAPACITY
176 )
177 })?,
178 None => reg.latest().cloned().ok_or_else(|| {
179 "No screenshots registered yet — call `screenshot` before \
180 `click` / `mouse_move`."
181 .to_string()
182 })?,
183 };
184 Ok((
185 (x as f64 * meta.scale_factor) as i32 + meta.offset_x,
186 (y as f64 * meta.scale_factor) as i32 + meta.offset_y,
187 ))
188 }
189
190 pub fn register_screenshot(
192 &self,
193 scale_factor: f64,
194 offset_x: i32,
195 offset_y: i32,
196 kind: String,
197 ) -> u64 {
198 let id = self.id_counter.fetch_add(1, Ordering::Relaxed);
199 if let Ok(mut reg) = self.registry.lock() {
200 reg.push(ScreenshotMetadata {
201 id,
202 scale_factor,
203 offset_x,
204 offset_y,
205 kind,
206 });
207 }
208 id
209 }
210
211 pub async fn capture(
214 &self,
215 spec: ScreenshotSpec,
216 token: &CancellationToken,
217 ) -> Result<CaptureResult> {
218 self.ensure_alive()
219 .map_err(|error| anyhow::anyhow!(error))?;
220
221 let seq = self.file_counter.fetch_add(1, Ordering::Relaxed);
222 let temp_path = std::env::temp_dir().join(format!("mermaid-screenshot-{}.png", seq));
223 let temp_str = temp_path.to_string_lossy().to_string();
224 let _guard = TempFileGuard(temp_path.clone());
225
226 let (offset_x, offset_y, kind) =
227 dispatch_capture(self.backend, &spec, &temp_str, token).await?;
228
229 let scale_factor = downscale_if_needed(&temp_str, SCREENSHOT_MAX_WIDTH).await?;
230 let id = self.register_screenshot(scale_factor, offset_x, offset_y, kind.clone());
231
232 let raw_bytes = tokio::fs::read(&temp_path)
233 .await
234 .context("reading captured screenshot")?;
235 let width = read_png_width(&raw_bytes).unwrap_or(0);
236 let height = read_png_height(&raw_bytes).unwrap_or(0);
237 let base64_png = general_purpose::STANDARD.encode(&raw_bytes);
238
239 let offset_info = if offset_x != 0 || offset_y != 0 {
240 format!(", offset: +{}+{}", offset_x, offset_y)
241 } else {
242 String::new()
243 };
244 let summary = format!(
245 "Screenshot captured (id: {}, {}, {}x{}, scale: {:.2}x{})",
246 id, kind, width, height, scale_factor, offset_info
247 );
248
249 Ok(CaptureResult {
250 id,
251 base64_png,
252 raw_bytes,
253 width,
254 height,
255 scale_factor,
256 offset_x,
257 offset_y,
258 summary,
259 })
260 }
261
262 pub async fn capture_focused_for_autoshot(
268 &self,
269 token: &CancellationToken,
270 ) -> Option<(String, String)> {
271 let cap = self.capture(ScreenshotSpec::Focused, token).await.ok()?;
272 Some((cap.summary, cap.base64_png))
273 }
274
275 pub async fn check_cursor_landed(&self, sx: i32, sy: i32) -> Option<String> {
282 if !matches!(self.backend, Backend::X11) {
283 return None;
284 }
285 let out = run_cmd_stdout(Command::new("xdotool").arg("getmouselocation"))
286 .await
287 .ok()?;
288 let mut actual_x: Option<i32> = None;
289 let mut actual_y: Option<i32> = None;
290 for tok in out.split_whitespace() {
291 if let Some(v) = tok.strip_prefix("X:") {
292 actual_x = v.parse().ok();
293 } else if let Some(v) = tok.strip_prefix("Y:") {
294 actual_y = v.parse().ok();
295 }
296 }
297 let (ax, ay) = (actual_x?, actual_y?);
298 if (ax - sx).abs() > CURSOR_LANDED_TOLERANCE_PX
299 || (ay - sy).abs() > CURSOR_LANDED_TOLERANCE_PX
300 {
301 Some(format!(
302 "WARNING: cursor at ({}, {}), expected ({}, {}). Window may have moved \
303 or focus changed before the click landed.",
304 ax, ay, sx, sy
305 ))
306 } else {
307 None
308 }
309 }
310}
311
312const CURSOR_LANDED_TOLERANCE_PX: i32 = 5;
315
316impl ComputerUseDriver {
319 pub async fn click(
322 &self,
323 sx: i32,
324 sy: i32,
325 button: &str,
326 token: &CancellationToken,
327 ) -> Result<()> {
328 let code = match button {
329 "middle" => "2",
330 "right" => "3",
331 _ => "1",
332 };
333 match self.backend {
334 Backend::X11 => {
335 run_cmd_cancellable(
336 Command::new("xdotool").args([
337 "mousemove",
338 "--sync",
339 &sx.to_string(),
340 &sy.to_string(),
341 "click",
342 "--clearmodifiers",
343 code,
344 ]),
345 token,
346 )
347 .await
348 },
349 Backend::Wayland => {
350 if !super::has_command("ydotool") {
351 anyhow::bail!("ydotool required for Wayland mouse control")
352 }
353 run_cmd_cancellable(
354 Command::new("ydotool").args([
355 "mousemove",
356 "--absolute",
357 "-x",
358 &sx.to_string(),
359 "-y",
360 &sy.to_string(),
361 ]),
362 token,
363 )
364 .await?;
365 run_cmd_cancellable(
366 Command::new("ydotool").args(["click", &format!("0x{}", code)]),
367 token,
368 )
369 .await
370 },
371 _ => anyhow::bail!("click not supported on this platform"),
372 }
373 }
374
375 pub async fn type_text(&self, text: &str, token: &CancellationToken) -> Result<()> {
379 let delay = crate::constants::TYPE_KEY_DELAY_MS.to_string();
380 match self.backend {
381 Backend::X11 => {
382 run_cmd_cancellable(
383 Command::new("xdotool").args([
384 "type",
385 "--clearmodifiers",
386 "--delay",
387 &delay,
388 text,
389 ]),
390 token,
391 )
392 .await
393 },
394 Backend::Wayland => {
395 if super::has_command("wtype") {
396 run_cmd_cancellable(Command::new("wtype").arg(text), token).await
397 } else if super::has_command("ydotool") {
398 run_cmd_cancellable(
399 Command::new("ydotool").args(["type", "--delay", &delay, text]),
400 token,
401 )
402 .await
403 } else {
404 anyhow::bail!("wtype or ydotool required for Wayland text input")
405 }
406 },
407 _ => anyhow::bail!("type_text not supported on this platform"),
408 }
409 }
410
411 pub async fn press_key(&self, key: &str, token: &CancellationToken) -> Result<()> {
413 match self.backend {
414 Backend::X11 => {
415 run_cmd_cancellable(Command::new("xdotool").args(["key", key]), token).await
416 },
417 Backend::Wayland => {
418 if super::has_command("wtype") {
419 let parts: Vec<&str> = key.split('+').collect();
421 let mut args: Vec<String> = Vec::new();
422 for (i, part) in parts.iter().enumerate() {
423 if i < parts.len() - 1 {
424 args.push("-M".to_string());
425 args.push(part.to_string());
426 } else {
427 args.push("-k".to_string());
428 args.push(part.to_string());
429 }
430 }
431 for part in parts.iter().take(parts.len().saturating_sub(1)) {
432 args.push("-m".to_string());
433 args.push(part.to_string());
434 }
435 run_cmd_cancellable(Command::new("wtype").args(&args), token).await
436 } else if super::has_command("ydotool") {
437 run_cmd_cancellable(Command::new("ydotool").args(["key", key]), token).await
438 } else {
439 anyhow::bail!("wtype or ydotool required for Wayland key input")
440 }
441 },
442 _ => anyhow::bail!("press_key not supported on this platform"),
443 }
444 }
445
446 pub async fn scroll(
448 &self,
449 direction: &str,
450 amount: i32,
451 token: &CancellationToken,
452 ) -> Result<()> {
453 match self.backend {
454 Backend::X11 => {
455 let button = if direction == "up" { "4" } else { "5" };
457 let mut args: Vec<String> = Vec::new();
458 for _ in 0..amount {
459 args.push("click".to_string());
460 args.push(button.to_string());
461 }
462 run_cmd_cancellable(Command::new("xdotool").args(&args), token).await
463 },
464 Backend::Wayland => {
465 if !super::has_command("ydotool") {
466 anyhow::bail!("ydotool required for Wayland scroll")
467 }
468 let wheel_amount = if direction == "up" { -amount } else { amount };
469 run_cmd_cancellable(
470 Command::new("ydotool").args([
471 "mousemove",
472 "--wheel",
473 &wheel_amount.to_string(),
474 ]),
475 token,
476 )
477 .await
478 },
479 _ => anyhow::bail!("scroll not supported on this platform"),
480 }
481 }
482
483 pub async fn mouse_move(&self, sx: i32, sy: i32, token: &CancellationToken) -> Result<()> {
485 match self.backend {
486 Backend::X11 => {
487 run_cmd_cancellable(
488 Command::new("xdotool").args([
489 "mousemove",
490 "--sync",
491 &sx.to_string(),
492 &sy.to_string(),
493 ]),
494 token,
495 )
496 .await
497 },
498 Backend::Wayland => {
499 if !super::has_command("ydotool") {
500 anyhow::bail!("ydotool required for Wayland mouse control")
501 }
502 run_cmd_cancellable(
503 Command::new("ydotool").args([
504 "mousemove",
505 "--absolute",
506 "-x",
507 &sx.to_string(),
508 "-y",
509 &sy.to_string(),
510 ]),
511 token,
512 )
513 .await
514 },
515 _ => anyhow::bail!("mouse_move not supported on this platform"),
516 }
517 }
518
519 pub async fn list_windows(&self, _token: &CancellationToken) -> Result<Vec<String>> {
522 if !matches!(self.backend, Backend::X11) {
523 anyhow::bail!(
524 "list_windows requires X11. Wayland has no portable window-enumeration \
525 primitive. Run mermaid from an X11 session."
526 );
527 }
528 let wids =
529 run_cmd_stdout(Command::new("xdotool").args(["search", "--onlyvisible", "--name", ""]))
530 .await?;
531 let mut windows = Vec::new();
532 for wid in wids.lines() {
533 let wid = wid.trim();
534 if wid.is_empty() {
535 continue;
536 }
537 if let Ok(name) =
538 run_cmd_stdout(Command::new("xdotool").args(["getwindowname", wid])).await
539 {
540 let name = name.trim().to_string();
541 if !name.is_empty() && !windows.contains(&name) {
542 windows.push(name);
543 }
544 }
545 }
546 Ok(windows)
547 }
548}
549
550struct TempFileGuard(PathBuf);
553
554impl Drop for TempFileGuard {
555 fn drop(&mut self) {
556 let _ = std::fs::remove_file(&self.0);
557 }
558}
559
560async fn dispatch_capture(
563 backend: Backend,
564 spec: &ScreenshotSpec,
565 out_path: &str,
566 token: &CancellationToken,
567) -> Result<(i32, i32, String)> {
568 match (backend, spec) {
572 (Backend::X11, ScreenshotSpec::Fullscreen) => {
573 run_cmd_cancellable(Command::new("scrot").args(["-o", out_path]), token).await?;
574 Ok((0, 0, "fullscreen".to_string()))
575 },
576 (Backend::Wayland, ScreenshotSpec::Fullscreen) => {
577 run_cmd_cancellable(Command::new("grim").arg(out_path), token).await?;
578 Ok((0, 0, "fullscreen".to_string()))
579 },
580 (Backend::MacOS, ScreenshotSpec::Fullscreen) => {
581 run_cmd_cancellable(Command::new("screencapture").args(["-x", out_path]), token)
582 .await?;
583 Ok((0, 0, "fullscreen".to_string()))
584 },
585 (Backend::X11, ScreenshotSpec::Focused) => {
586 let (wx, wy) = get_focused_window_geometry_x11()
587 .await
588 .map(|(x, y, _, _)| (x, y))
589 .unwrap_or((0, 0));
590 run_cmd_cancellable(Command::new("scrot").args(["-u", "-o", out_path]), token).await?;
591 Ok((wx, wy, "focused window".to_string()))
592 },
593 (Backend::Wayland, ScreenshotSpec::Focused) => anyhow::bail!(
594 "Mode 'focused' not supported on Wayland (grim has no focused-window \
595 primitive). Use mode: 'fullscreen' or mode: 'monitor' with a specific \
596 output name."
597 ),
598 (Backend::MacOS, ScreenshotSpec::Focused) => {
599 run_cmd_cancellable(
600 Command::new("screencapture").args(["-x", "-W", out_path]),
601 token,
602 )
603 .await?;
604 Ok((0, 0, "focused window".to_string()))
605 },
606 (Backend::X11, ScreenshotSpec::Region(x, y, w, h)) => {
607 run_cmd_cancellable(
608 Command::new("scrot").args([
609 "-a",
610 &format!("{},{},{},{}", x, y, w, h),
611 "-o",
612 out_path,
613 ]),
614 token,
615 )
616 .await?;
617 Ok((*x, *y, format!("region {}x{}+{}+{}", w, h, x, y)))
618 },
619 (Backend::Wayland, ScreenshotSpec::Region(x, y, w, h)) => {
620 run_cmd_cancellable(
621 Command::new("grim").args(["-g", &format!("{},{} {}x{}", x, y, w, h), out_path]),
622 token,
623 )
624 .await?;
625 Ok((*x, *y, format!("region {}x{}+{}+{}", w, h, x, y)))
626 },
627 (Backend::X11, ScreenshotSpec::Monitor(name)) => {
628 let (mx, my, mw, mh) = parse_monitor_geometry_x11(name).await.ok_or_else(|| {
629 anyhow::anyhow!(
630 "Monitor '{}' not found. Run `xrandr --query` to list outputs.",
631 name
632 )
633 })?;
634 run_cmd_cancellable(
635 Command::new("scrot").args([
636 "-a",
637 &format!("{},{},{},{}", mx, my, mw, mh),
638 "-o",
639 out_path,
640 ]),
641 token,
642 )
643 .await?;
644 Ok((mx, my, format!("monitor {}", name)))
645 },
646 (Backend::Wayland, ScreenshotSpec::Monitor(name)) => {
647 run_cmd_cancellable(Command::new("grim").args(["-o", name, out_path]), token).await?;
648 Ok((0, 0, format!("monitor {}", name)))
649 },
650 (Backend::X11, ScreenshotSpec::Window(title)) => {
651 let wid = run_cmd_stdout(Command::new("xdotool").args(["search", "--name", title]))
654 .await?
655 .lines()
656 .next()
657 .map(str::trim)
658 .filter(|s| !s.is_empty())
659 .map(str::to_string)
660 .ok_or_else(|| {
661 anyhow::anyhow!(
662 "No window found matching '{}'. Use list_windows to see available \
663 windows.",
664 title
665 )
666 })?;
667 run_cmd_cancellable(
668 Command::new("xdotool").args(["windowactivate", "--sync", &wid]),
669 token,
670 )
671 .await?;
672 tokio::time::sleep(std::time::Duration::from_millis(
673 crate::constants::WINDOW_FOCUS_DELAY_MS,
674 ))
675 .await;
676 let (wx, wy) = get_window_geometry_x11(&wid)
677 .await
678 .map(|(x, y, _, _)| (x, y))
679 .unwrap_or((0, 0));
680 run_cmd_cancellable(Command::new("scrot").args(["-u", "-o", out_path]), token).await?;
681 Ok((wx, wy, format!("window \"{}\"", title)))
682 },
683 (Backend::Wayland, ScreenshotSpec::Window(_)) => anyhow::bail!(
684 "Mode 'window' not supported on Wayland (grim has no window-by-name capture). \
685 Use mode: 'fullscreen' or mode: 'monitor' with a specific output name."
686 ),
687 (Backend::MacOS, _) => anyhow::bail!(
688 "This screenshot mode is not yet ported to macOS. Use mode: 'fullscreen' for now."
689 ),
690 (Backend::Windows, _) | (Backend::Unsupported, _) => {
691 anyhow::bail!("Unsupported platform for computer-use capture")
692 },
693 }
694}
695
696pub(crate) async fn run_cmd_cancellable(
700 cmd: &mut Command,
701 token: &CancellationToken,
702) -> Result<()> {
703 cmd.kill_on_drop(true);
704 tokio::select! {
705 biased;
706 _ = token.cancelled() => anyhow::bail!("cancelled"),
707 res = cmd.output() => {
708 let out = res.context("subprocess spawn")?;
709 if !out.status.success() {
710 anyhow::bail!(
711 "subprocess failed: {}",
712 String::from_utf8_lossy(&out.stderr).trim()
713 );
714 }
715 Ok(())
716 }
717 }
718}
719
720async fn run_cmd_stdout(cmd: &mut Command) -> Result<String> {
721 let out = cmd.output().await.context("subprocess spawn")?;
722 if !out.status.success() {
723 anyhow::bail!(
724 "subprocess failed: {}",
725 String::from_utf8_lossy(&out.stderr).trim()
726 );
727 }
728 Ok(String::from_utf8_lossy(&out.stdout).to_string())
729}
730
731async fn get_focused_window_geometry_x11() -> Option<(i32, i32, u32, u32)> {
734 let wid = run_cmd_stdout(Command::new("xdotool").arg("getactivewindow"))
735 .await
736 .ok()?;
737 let wid = wid.trim();
738 if wid.is_empty() {
739 return None;
740 }
741 get_window_geometry_x11(wid).await
742}
743
744async fn get_window_geometry_x11(wid: &str) -> Option<(i32, i32, u32, u32)> {
745 let out = run_cmd_stdout(Command::new("xdotool").args(["getwindowgeometry", "--shell", wid]))
746 .await
747 .ok()?;
748 let mut x = None;
749 let mut y = None;
750 let mut width = None;
751 let mut height = None;
752 for line in out.lines() {
753 if let Some(v) = line.strip_prefix("X=") {
754 x = v.parse().ok();
755 } else if let Some(v) = line.strip_prefix("Y=") {
756 y = v.parse().ok();
757 } else if let Some(v) = line.strip_prefix("WIDTH=") {
758 width = v.parse().ok();
759 } else if let Some(v) = line.strip_prefix("HEIGHT=") {
760 height = v.parse().ok();
761 }
762 }
763 Some((x?, y?, width?, height?))
764}
765
766async fn parse_monitor_geometry_x11(name: &str) -> Option<(i32, i32, u32, u32)> {
767 let out = run_cmd_stdout(Command::new("xrandr").arg("--query"))
768 .await
769 .ok()?;
770 for line in out.lines() {
771 if !line.contains(" connected") {
772 continue;
773 }
774 let parts: Vec<&str> = line.split_whitespace().collect();
775 if parts.first() != Some(&name) {
776 continue;
777 }
778 for part in &parts[2..] {
779 if let Some((res, offsets)) = part.split_once('+')
780 && let Some((w, h)) = res.split_once('x')
781 {
782 let width = w.parse::<u32>().ok()?;
783 let height = h.parse::<u32>().ok()?;
784 let mut off = offsets.splitn(2, '+');
785 let x = off.next()?.parse::<i32>().ok()?;
786 let y = off.next()?.parse::<i32>().ok()?;
787 return Some((x, y, width, height));
788 }
789 }
790 }
791 None
792}
793
794fn read_png_width(bytes: &[u8]) -> Option<u32> {
797 if bytes.len() > 24 && &bytes[0..8] == b"\x89PNG\r\n\x1a\n" {
798 Some(u32::from_be_bytes([
799 bytes[16], bytes[17], bytes[18], bytes[19],
800 ]))
801 } else {
802 None
803 }
804}
805
806fn read_png_height(bytes: &[u8]) -> Option<u32> {
807 if bytes.len() > 28 && &bytes[0..8] == b"\x89PNG\r\n\x1a\n" {
808 Some(u32::from_be_bytes([
809 bytes[20], bytes[21], bytes[22], bytes[23],
810 ]))
811 } else {
812 None
813 }
814}
815
816async fn downscale_if_needed(path: &str, max_width: u32) -> Result<f64> {
821 let bytes = tokio::fs::read(path).await?;
822 let original_width = read_png_width(&bytes).unwrap_or(1920);
823 if original_width <= max_width {
824 return Ok(1.0);
825 }
826 let scale_factor = original_width as f64 / max_width as f64;
827 let scaled = format!("{}.scaled.png", path);
828
829 let convert = Command::new("convert")
830 .args([path, "-resize", &format!("{}x", max_width), &scaled])
831 .output()
832 .await;
833 if let Ok(o) = convert
834 && o.status.success()
835 {
836 tokio::fs::rename(&scaled, path).await?;
837 return Ok(scale_factor);
838 }
839
840 let ffmpeg = Command::new("ffmpeg")
841 .args([
842 "-y",
843 "-i",
844 path,
845 "-vf",
846 &format!("scale={}:-1", max_width),
847 &scaled,
848 ])
849 .output()
850 .await;
851 if let Ok(o) = ffmpeg
852 && o.status.success()
853 {
854 tokio::fs::rename(&scaled, path).await?;
855 return Ok(scale_factor);
856 }
857
858 let _ = tokio::fs::remove_file(&scaled).await;
859 tracing::warn!(
860 original_width,
861 "neither ImageMagick nor ffmpeg available; sending full-resolution screenshot"
862 );
863 Ok(1.0)
864}
865
866#[cfg(test)]
867mod tests {
868 use super::*;
869
870 #[test]
871 fn registry_lru_evicts_oldest_past_capacity() {
872 let mut r = ScreenshotRegistry::new();
873 for i in 0..(SCREENSHOT_REGISTRY_CAPACITY as u64 + 3) {
874 r.push(ScreenshotMetadata {
875 id: i,
876 scale_factor: 1.0,
877 offset_x: 0,
878 offset_y: 0,
879 kind: "fullscreen".to_string(),
880 });
881 }
882 assert_eq!(r.len(), SCREENSHOT_REGISTRY_CAPACITY);
883 assert!(r.get(0).is_none());
885 assert!(r.get(1).is_none());
886 assert!(r.get(2).is_none());
887 assert_eq!(
889 r.latest().unwrap().id,
890 SCREENSHOT_REGISTRY_CAPACITY as u64 + 2
891 );
892 }
893
894 #[test]
895 fn scale_coords_applies_scale_and_offset() {
896 let d = ComputerUseDriver::new(Backend::X11);
897 let id = d.register_screenshot(2.0, 100, 50, "fullscreen".to_string());
898 let (sx, sy) = d.scale_coords(10, 20, Some(id)).unwrap();
899 assert_eq!(sx, 100 + 20);
900 assert_eq!(sy, 50 + 40);
901 }
902
903 #[test]
904 fn scale_coords_errors_on_evicted_id() {
905 let d = ComputerUseDriver::new(Backend::X11);
906 for _ in 0..(SCREENSHOT_REGISTRY_CAPACITY + 1) {
907 d.register_screenshot(1.0, 0, 0, "fullscreen".to_string());
908 }
909 let err = d.scale_coords(0, 0, Some(0)).unwrap_err();
911 assert!(
912 err.contains("evicted"),
913 "expected eviction message, got: {}",
914 err
915 );
916 }
917
918 #[test]
919 fn scale_coords_errors_with_no_screenshots_yet() {
920 let d = ComputerUseDriver::new(Backend::X11);
921 let err = d.scale_coords(10, 20, None).unwrap_err();
922 assert!(err.contains("No screenshots"));
923 }
924
925 #[test]
926 fn ensure_alive_fails_on_unsupported_backend() {
927 let d = ComputerUseDriver::new(Backend::Unsupported);
928 assert!(d.ensure_alive().is_err());
929 }
930}