Skip to main content

linux_backend/
lib.rs

1#![allow(clippy::result_large_err)]
2use std::path::{Path, PathBuf};
3use std::process::Stdio;
4
5use base64::Engine;
6use chrono::Utc;
7use desktop_core::{
8    ActionReceipt, ActionRequest, ArtifactRef, CursorPosition, MouseButton, Observation,
9    ScreenshotData, StructuredError, WindowMetadata,
10};
11use serde_json::{Value, json};
12use tokio::fs;
13use tokio::process::Command;
14
15#[derive(Debug, Clone)]
16pub struct BackendOptions {
17    pub display: String,
18    pub artifacts_dir: PathBuf,
19    pub browser_command: String,
20    pub session_env: Vec<(String, String)>,
21}
22
23#[derive(Debug, Clone)]
24pub struct LinuxBackend {
25    options: BackendOptions,
26}
27
28impl LinuxBackend {
29    pub fn new(options: BackendOptions) -> Self {
30        Self { options }
31    }
32
33    pub fn display(&self) -> &str {
34        &self.options.display
35    }
36
37    pub fn artifacts_dir(&self) -> &Path {
38        &self.options.artifacts_dir
39    }
40
41    pub fn browser_command(&self) -> &str {
42        &self.options.browser_command
43    }
44
45    fn apply_display_env(&self, command: &mut Command) {
46        command.env("DISPLAY", &self.options.display);
47        for (key, value) in &self.options.session_env {
48            command.env(key, value);
49        }
50    }
51
52    pub fn capabilities(&self) -> Vec<String> {
53        let mut caps = vec![
54            "screenshot".to_string(),
55            "shell".to_string(),
56            "filesystem".to_string(),
57        ];
58        if Self::tool_exists("xdotool") {
59            caps.extend([
60                "mouse".to_string(),
61                "keyboard".to_string(),
62                "window_focus".to_string(),
63                "window_resize".to_string(),
64            ]);
65        }
66        if Self::tool_exists("xprop") {
67            caps.push("window_metadata".to_string());
68        }
69        if Self::tool_exists(&self.options.browser_command) {
70            caps.push("browser_open".to_string());
71        }
72        caps
73    }
74
75    pub async fn observation(&self) -> Result<Observation, StructuredError> {
76        let screenshot = self.capture_screenshot().await?;
77        let active_window = self.active_window().await.ok();
78        let cursor_position = self.cursor_position().await.ok();
79        let active_window_title = active_window
80            .as_ref()
81            .and_then(|window| window.title.clone());
82        Ok(Observation {
83            captured_at: Utc::now(),
84            capability_flags: self.capabilities(),
85            active_window,
86            cursor_position,
87            browser: None,
88            raw: json!({
89                "display": self.options.display,
90            }),
91            summary: json!({
92                "display": self.options.display,
93                "active_window": active_window_title,
94            }),
95            screenshot,
96        })
97    }
98
99    pub async fn screenshot_png(&self) -> Result<(Vec<u8>, PathBuf), StructuredError> {
100        let screenshot = self.capture_screenshot().await?;
101        let path = screenshot
102            .artifact_path
103            .clone()
104            .ok_or_else(|| self.io_error("screenshot artifact path missing".to_string()))?;
105        let bytes = fs::read(&path)
106            .await
107            .map_err(|error| self.io_error(error.to_string()))?;
108        Ok((bytes, PathBuf::from(path)))
109    }
110
111    pub async fn perform_action(&self, action: ActionRequest) -> ActionReceipt {
112        let started_at = Utc::now();
113        let action_name = action.action_name().to_string();
114        match self.perform_action_inner(action).await {
115            Ok((result, artifacts)) => {
116                ActionReceipt::success(&action_name, started_at, result, artifacts)
117            }
118            Err(error) => ActionReceipt::failure(&action_name, started_at, error),
119        }
120    }
121
122    async fn perform_action_inner(
123        &self,
124        action: ActionRequest,
125    ) -> Result<(Value, Vec<ArtifactRef>), StructuredError> {
126        match action {
127            ActionRequest::MouseMove { x, y, .. } => {
128                self.run_xdotool(["mousemove", &x.to_string(), &y.to_string()])
129                    .await?;
130                Ok((json!({"x": x, "y": y}), vec![]))
131            }
132            ActionRequest::MouseClick { button, x, y, .. } => {
133                if let (Some(x), Some(y)) = (x, y) {
134                    self.run_xdotool(["mousemove", &x.to_string(), &y.to_string()])
135                        .await?;
136                }
137                let button_number = match button.unwrap_or(MouseButton::Left) {
138                    MouseButton::Left => "1",
139                    MouseButton::Middle => "2",
140                    MouseButton::Right => "3",
141                };
142                self.run_xdotool(["click", button_number]).await?;
143                Ok((json!({"button": button_number}), vec![]))
144            }
145            ActionRequest::MouseDrag {
146                start_x,
147                start_y,
148                end_x,
149                end_y,
150                ..
151            } => {
152                self.run_xdotool(["mousemove", &start_x.to_string(), &start_y.to_string()])
153                    .await?;
154                self.run_xdotool(["mousedown", "1"]).await?;
155                self.run_xdotool(["mousemove", &end_x.to_string(), &end_y.to_string()])
156                    .await?;
157                self.run_xdotool(["mouseup", "1"]).await?;
158                Ok((
159                    json!({"start": [start_x, start_y], "end": [end_x, end_y]}),
160                    vec![],
161                ))
162            }
163            ActionRequest::KeyPress { key, .. } => {
164                self.run_xdotool(["key", &key]).await?;
165                Ok((json!({"key": key}), vec![]))
166            }
167            ActionRequest::TypeText { text, .. } => {
168                self.run_xdotool(["type", "--delay", "1", &text]).await?;
169                Ok((json!({"typed": text}), vec![]))
170            }
171            ActionRequest::Hotkey { keys, .. } => {
172                let joined = keys.join("+");
173                self.run_xdotool(["key", &joined]).await?;
174                Ok((json!({"keys": keys}), vec![]))
175            }
176            ActionRequest::Scroll {
177                delta_x: _,
178                delta_y,
179                ..
180            } => {
181                if delta_y == 0 {
182                    return Err(self.unsupported(
183                        "horizontal-only scroll is not supported by the xdotool fallback",
184                    ));
185                }
186                let button = if delta_y > 0 { "4" } else { "5" };
187                let clicks = (delta_y.abs().max(1) / 120) + 1;
188                for _ in 0..clicks {
189                    self.run_xdotool(["click", button]).await?;
190                }
191                Ok((
192                    json!({"delta_y": delta_y, "emulated_clicks": clicks}),
193                    vec![],
194                ))
195            }
196            ActionRequest::OpenApp { name, .. } => {
197                self.run_shell_background(&name).await?;
198                Ok((json!({"command": name}), vec![]))
199            }
200            ActionRequest::FocusWindow { window_id, .. } => {
201                self.run_xdotool(["windowactivate", &window_id]).await?;
202                Ok((json!({"window_id": window_id}), vec![]))
203            }
204            ActionRequest::ResizeWindow {
205                window_id, bounds, ..
206            } => {
207                self.run_xdotool([
208                    "windowsize",
209                    &window_id,
210                    &bounds.width.to_string(),
211                    &bounds.height.to_string(),
212                ])
213                .await?;
214                self.run_xdotool([
215                    "windowmove",
216                    &window_id,
217                    &bounds.x.to_string(),
218                    &bounds.y.to_string(),
219                ])
220                .await?;
221                Ok((json!({"window_id": window_id, "bounds": bounds}), vec![]))
222            }
223            ActionRequest::RunCommand {
224                command, cwd, env, ..
225            } => {
226                let mut cmd = Command::new("sh");
227                cmd.arg("-lc").arg(&command);
228                cmd.env("DISPLAY", &self.options.display);
229                if let Some(cwd) = cwd.as_ref() {
230                    cmd.current_dir(cwd);
231                }
232                if let Some(env_map) = env {
233                    for (key, value) in env_map {
234                        cmd.env(key, value);
235                    }
236                }
237                let output = cmd
238                    .output()
239                    .await
240                    .map_err(|error| self.io_error(error.to_string()))?;
241                Ok((
242                    json!({
243                        "stdout": String::from_utf8_lossy(&output.stdout),
244                        "stderr": String::from_utf8_lossy(&output.stderr),
245                        "exit_code": output.status.code(),
246                    }),
247                    vec![],
248                ))
249            }
250            ActionRequest::ReadFile { path, .. } => {
251                let contents = fs::read_to_string(&path)
252                    .await
253                    .map_err(|error| self.io_error(error.to_string()))?;
254                Ok((json!({"path": path, "contents": contents}), vec![]))
255            }
256            ActionRequest::WriteFile { path, contents, .. } => {
257                if let Some(parent) = Path::new(&path).parent() {
258                    fs::create_dir_all(parent)
259                        .await
260                        .map_err(|error| self.io_error(error.to_string()))?;
261                }
262                fs::write(&path, contents.as_bytes())
263                    .await
264                    .map_err(|error| self.io_error(error.to_string()))?;
265                Ok((
266                    json!({"path": path, "bytes_written": contents.len()}),
267                    vec![],
268                ))
269            }
270            ActionRequest::BrowserOpen { url, .. } => {
271                let escaped = url.replace('"', "\\\"").replace('\'', "'\\''");
272                self.run_shell_background(&format!(
273                    "{} '{}'",
274                    self.options.browser_command, escaped
275                ))
276                .await?;
277                Ok((json!({"url": url, "mode": "desktop_fallback"}), vec![]))
278            }
279            ActionRequest::BrowserGetDom { .. }
280            | ActionRequest::BrowserClick { .. }
281            | ActionRequest::BrowserType { .. }
282            | ActionRequest::BrowserScreenshot { .. } => Err(self.unsupported(
283                "browser-specialized actions are handled by the control-plane browser adapter",
284            )),
285        }
286    }
287
288    async fn capture_screenshot(&self) -> Result<ScreenshotData, StructuredError> {
289        self.ensure_tool("import")?;
290        fs::create_dir_all(&self.options.artifacts_dir)
291            .await
292            .map_err(|error| self.io_error(error.to_string()))?;
293        let screenshot_path = self
294            .options
295            .artifacts_dir
296            .join(format!("screenshot-{}.png", Utc::now().timestamp_millis()));
297        let mut command = Command::new("import");
298        command.args([
299            "-window",
300            "root",
301            screenshot_path.to_string_lossy().as_ref(),
302        ]);
303        self.apply_display_env(&mut command);
304        let output = command
305            .output()
306            .await
307            .map_err(|error| self.io_error(error.to_string()))?;
308        if !output.status.success() {
309            return Err(self.command_error(
310                "import",
311                String::from_utf8_lossy(&output.stderr).into_owned(),
312            ));
313        }
314        let data = fs::read(&screenshot_path)
315            .await
316            .map_err(|error| self.io_error(error.to_string()))?;
317        Ok(ScreenshotData {
318            mime_type: "image/png".to_string(),
319            data_base64: Some(base64::engine::general_purpose::STANDARD.encode(data)),
320            width: None,
321            height: None,
322            artifact_path: Some(screenshot_path.to_string_lossy().to_string()),
323        })
324    }
325
326    async fn active_window(&self) -> Result<WindowMetadata, StructuredError> {
327        self.ensure_tool("xdotool")?;
328        let id = self
329            .run_command_capture("xdotool", ["getactivewindow"])
330            .await?;
331        let title = self
332            .run_command_capture("xdotool", ["getactivewindow", "getwindowname"])
333            .await
334            .unwrap_or_default();
335        let class_name = if Self::tool_exists("xprop") {
336            self.run_command_capture("xprop", ["-id", id.trim(), "WM_CLASS"])
337                .await
338                .ok()
339        } else {
340            None
341        };
342        Ok(WindowMetadata {
343            id: Some(id.trim().to_string()),
344            title: Some(title.trim().to_string()).filter(|value| !value.is_empty()),
345            class_name: class_name.map(|value| value.trim().to_string()),
346        })
347    }
348
349    async fn cursor_position(&self) -> Result<CursorPosition, StructuredError> {
350        self.ensure_tool("xdotool")?;
351        let output = self
352            .run_command_capture("xdotool", ["getmouselocation", "--shell"])
353            .await?;
354        let mut x = 0;
355        let mut y = 0;
356        let mut screen = None;
357        for line in output.lines() {
358            if let Some(value) = line.strip_prefix("X=") {
359                x = value.parse().unwrap_or_default();
360            } else if let Some(value) = line.strip_prefix("Y=") {
361                y = value.parse().unwrap_or_default();
362            } else if let Some(value) = line.strip_prefix("SCREEN=") {
363                screen = Some(value.to_string());
364            }
365        }
366        Ok(CursorPosition { x, y, screen })
367    }
368
369    async fn run_xdotool<I, S>(&self, args: I) -> Result<(), StructuredError>
370    where
371        I: IntoIterator<Item = S>,
372        S: AsRef<str>,
373    {
374        self.ensure_tool("xdotool")?;
375        let rendered: Vec<String> = args
376            .into_iter()
377            .map(|value| value.as_ref().to_string())
378            .collect();
379        let mut command = Command::new("xdotool");
380        command.args(&rendered);
381        self.apply_display_env(&mut command);
382        let output = command
383            .output()
384            .await
385            .map_err(|error| self.io_error(error.to_string()))?;
386        if output.status.success() {
387            Ok(())
388        } else {
389            Err(self.command_error(
390                "xdotool",
391                String::from_utf8_lossy(&output.stderr).into_owned(),
392            ))
393        }
394    }
395
396    async fn run_shell_background(&self, command: &str) -> Result<(), StructuredError> {
397        let mut child = Command::new("sh");
398        child
399            .arg("-lc")
400            .arg(format!("{} >/dev/null 2>&1 &", command))
401            .stdout(Stdio::null())
402            .stderr(Stdio::null());
403        self.apply_display_env(&mut child);
404        child
405            .spawn()
406            .map_err(|error| self.io_error(error.to_string()))?;
407        Ok(())
408    }
409
410    async fn run_command_capture<I, S>(
411        &self,
412        binary: &str,
413        args: I,
414    ) -> Result<String, StructuredError>
415    where
416        I: IntoIterator<Item = S>,
417        S: AsRef<str>,
418    {
419        let rendered: Vec<String> = args
420            .into_iter()
421            .map(|value| value.as_ref().to_string())
422            .collect();
423        let mut command = Command::new(binary);
424        command.args(&rendered);
425        self.apply_display_env(&mut command);
426        let output = command
427            .output()
428            .await
429            .map_err(|error| self.io_error(error.to_string()))?;
430        if output.status.success() {
431            Ok(String::from_utf8_lossy(&output.stdout).into_owned())
432        } else {
433            Err(self.command_error(binary, String::from_utf8_lossy(&output.stderr).into_owned()))
434        }
435    }
436
437    fn ensure_tool(&self, tool: &str) -> Result<(), StructuredError> {
438        if Self::tool_exists(tool) {
439            Ok(())
440        } else {
441            Err(self.missing_tool(tool))
442        }
443    }
444
445    pub fn tool_exists(tool: &str) -> bool {
446        std::process::Command::new("sh")
447            .arg("-lc")
448            .arg(format!("command -v {} >/dev/null 2>&1", tool))
449            .status()
450            .map(|status| status.success())
451            .unwrap_or(false)
452    }
453
454    fn missing_tool(&self, tool: &str) -> StructuredError {
455        StructuredError {
456            code: "missing_tool".to_string(),
457            message: format!("Required system tool `{tool}` is not available in the sandbox."),
458            retryable: false,
459            category: "environment".to_string(),
460            details: json!({"tool": tool}),
461            artifact_refs: vec![],
462        }
463    }
464
465    fn command_error(&self, binary: &str, stderr: String) -> StructuredError {
466        StructuredError {
467            code: "command_failed".to_string(),
468            message: format!("Command `{binary}` failed."),
469            retryable: true,
470            category: "execution".to_string(),
471            details: json!({"binary": binary, "stderr": stderr}),
472            artifact_refs: vec![],
473        }
474    }
475
476    fn unsupported(&self, message: &str) -> StructuredError {
477        StructuredError {
478            code: "unsupported".to_string(),
479            message: message.to_string(),
480            retryable: false,
481            category: "unsupported".to_string(),
482            details: json!({}),
483            artifact_refs: vec![],
484        }
485    }
486
487    fn io_error(&self, message: String) -> StructuredError {
488        StructuredError {
489            code: "io_error".to_string(),
490            message,
491            retryable: false,
492            category: "io".to_string(),
493            details: json!({}),
494            artifact_refs: vec![],
495        }
496    }
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn capabilities_are_non_empty() {
505        let backend = LinuxBackend::new(BackendOptions {
506            display: ":99".to_string(),
507            artifacts_dir: PathBuf::from("artifacts/test"),
508            browser_command: "firefox".to_string(),
509            session_env: vec![],
510        });
511        assert!(backend.capabilities().contains(&"shell".to_string()));
512    }
513}