Skip to main content

mermaid_cli/providers/tool/computer_use/
scroll.rs

1//! `scroll` — wheel scroll in "up" or "down" direction. Amount is
2//! clamped to `[1, MAX_SCROLL_AMOUNT]` so a runaway model can't
3//! request a million ticks (would blow ARG_MAX building xdotool's
4//! argv on the X11 path).
5
6use std::sync::Arc;
7use std::time::Instant;
8
9use async_trait::async_trait;
10use serde_json::Value;
11
12use crate::constants::MAX_SCROLL_AMOUNT;
13use crate::domain::{ToolDefinition, ToolOutcome};
14use crate::providers::ctx::ExecContext;
15
16use super::super::ToolExecutor;
17use super::computer_use_success;
18use super::driver::ComputerUseDriver;
19
20pub struct ScrollTool {
21    driver: Arc<ComputerUseDriver>,
22}
23
24impl ScrollTool {
25    pub fn new(driver: Arc<ComputerUseDriver>) -> Self {
26        Self { driver }
27    }
28}
29
30#[async_trait]
31impl ToolExecutor for ScrollTool {
32    fn name(&self) -> &'static str {
33        "scroll"
34    }
35
36    fn schema(&self) -> ToolDefinition {
37        ToolDefinition {
38            name: "scroll".to_string(),
39            description: "Scroll the focused window. `direction` is 'up' or 'down'; `amount` is \
40                 clamped to 1..=100 wheel ticks. Scrolls in the currently-focused window; \
41                 click on the scroll target first if it isn't focused."
42                .to_string(),
43            input_schema: serde_json::json!({
44                "type": "object",
45                "properties": {
46                    "direction": { "type": "string", "enum": ["up", "down"] },
47                    "amount": { "type": "integer", "minimum": 1, "maximum": MAX_SCROLL_AMOUNT }
48                },
49                "required": ["direction", "amount"]
50            }),
51        }
52    }
53
54    async fn execute(&self, args: Value, ctx: ExecContext) -> ToolOutcome {
55        let started = Instant::now();
56        if let Err(error) = self.driver.ensure_alive() {
57            return ToolOutcome::error(error, started.elapsed().as_secs_f64());
58        }
59
60        let direction = args
61            .get("direction")
62            .and_then(|v| v.as_str())
63            .unwrap_or("down")
64            .to_string();
65        if direction != "up" && direction != "down" {
66            return ToolOutcome::error(
67                format!(
68                    "scroll: direction must be 'up' or 'down', got '{}'",
69                    direction
70                ),
71                started.elapsed().as_secs_f64(),
72            );
73        }
74        let requested = args
75            .get("amount")
76            .and_then(|v| v.as_i64())
77            .map(|n| n as i32)
78            .unwrap_or(3);
79        let amount = requested.clamp(1, MAX_SCROLL_AMOUNT);
80
81        let res = tokio::select! {
82            biased;
83            _ = ctx.token.cancelled() => return ToolOutcome::cancelled(),
84            r = self.driver.scroll(&direction, amount, &ctx.token) => r,
85        };
86        if let Err(e) = res {
87            return ToolOutcome::error(
88                format!("scroll failed: {}", e),
89                started.elapsed().as_secs_f64(),
90            );
91        }
92
93        let clamp_note = if requested != amount {
94            format!(" (clamped from {} to {})", requested, amount)
95        } else {
96            String::new()
97        };
98        computer_use_success(
99            "scroll",
100            args,
101            format!("Scrolled {} by {}{}", direction, amount, clamp_note),
102            started.elapsed().as_secs_f64(),
103        )
104    }
105}