Skip to main content

desktop_core/
lib.rs

1use std::collections::BTreeMap;
2use std::fs;
3use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use schemars::{JsonSchema, schema_for};
7use serde::{Deserialize, Serialize};
8use serde_json::{Value, json};
9use uuid::Uuid;
10
11#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
12#[serde(rename_all = "snake_case")]
13pub enum MouseButton {
14    Left,
15    Middle,
16    Right,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
20pub struct Bounds {
21    pub x: i32,
22    pub y: i32,
23    pub width: u32,
24    pub height: u32,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
28#[serde(tag = "kind", rename_all = "snake_case")]
29pub enum ActionRequest {
30    MouseMove {
31        x: i32,
32        y: i32,
33        #[serde(default)]
34        task_id: Option<String>,
35    },
36    MouseClick {
37        button: Option<MouseButton>,
38        x: Option<i32>,
39        y: Option<i32>,
40        #[serde(default)]
41        task_id: Option<String>,
42    },
43    MouseDrag {
44        start_x: i32,
45        start_y: i32,
46        end_x: i32,
47        end_y: i32,
48        #[serde(default)]
49        task_id: Option<String>,
50    },
51    KeyPress {
52        key: String,
53        #[serde(default)]
54        task_id: Option<String>,
55    },
56    TypeText {
57        text: String,
58        #[serde(default)]
59        task_id: Option<String>,
60    },
61    Hotkey {
62        keys: Vec<String>,
63        #[serde(default)]
64        task_id: Option<String>,
65    },
66    Scroll {
67        delta_x: i32,
68        delta_y: i32,
69        #[serde(default)]
70        task_id: Option<String>,
71    },
72    OpenApp {
73        name: String,
74        #[serde(default)]
75        task_id: Option<String>,
76    },
77    FocusWindow {
78        window_id: String,
79        #[serde(default)]
80        task_id: Option<String>,
81    },
82    ResizeWindow {
83        window_id: String,
84        bounds: Bounds,
85        #[serde(default)]
86        task_id: Option<String>,
87    },
88    RunCommand {
89        command: String,
90        cwd: Option<String>,
91        env: Option<BTreeMap<String, String>>,
92        #[serde(default)]
93        task_id: Option<String>,
94    },
95    ReadFile {
96        path: String,
97        #[serde(default)]
98        task_id: Option<String>,
99    },
100    WriteFile {
101        path: String,
102        contents: String,
103        #[serde(default)]
104        task_id: Option<String>,
105    },
106    BrowserOpen {
107        url: String,
108        #[serde(default)]
109        task_id: Option<String>,
110    },
111    BrowserGetDom {
112        #[serde(default)]
113        task_id: Option<String>,
114    },
115    BrowserClick {
116        selector: Option<String>,
117        x: Option<i32>,
118        y: Option<i32>,
119        button: Option<MouseButton>,
120        #[serde(default)]
121        task_id: Option<String>,
122    },
123    BrowserType {
124        selector: Option<String>,
125        text: String,
126        #[serde(default)]
127        task_id: Option<String>,
128    },
129    BrowserScreenshot {
130        #[serde(default)]
131        task_id: Option<String>,
132    },
133}
134
135impl ActionRequest {
136    pub fn action_name(&self) -> &'static str {
137        match self {
138            Self::MouseMove { .. } => "mouse_move",
139            Self::MouseClick { .. } => "mouse_click",
140            Self::MouseDrag { .. } => "mouse_drag",
141            Self::KeyPress { .. } => "key_press",
142            Self::TypeText { .. } => "type_text",
143            Self::Hotkey { .. } => "hotkey",
144            Self::Scroll { .. } => "scroll",
145            Self::OpenApp { .. } => "open_app",
146            Self::FocusWindow { .. } => "focus_window",
147            Self::ResizeWindow { .. } => "resize_window",
148            Self::RunCommand { .. } => "run_command",
149            Self::ReadFile { .. } => "read_file",
150            Self::WriteFile { .. } => "write_file",
151            Self::BrowserOpen { .. } => "browser_open",
152            Self::BrowserGetDom { .. } => "browser_get_dom",
153            Self::BrowserClick { .. } => "browser_click",
154            Self::BrowserType { .. } => "browser_type",
155            Self::BrowserScreenshot { .. } => "browser_screenshot",
156        }
157    }
158
159    pub fn task_id(&self) -> Option<&str> {
160        match self {
161            Self::MouseMove { task_id, .. }
162            | Self::MouseClick { task_id, .. }
163            | Self::MouseDrag { task_id, .. }
164            | Self::KeyPress { task_id, .. }
165            | Self::TypeText { task_id, .. }
166            | Self::Hotkey { task_id, .. }
167            | Self::Scroll { task_id, .. }
168            | Self::OpenApp { task_id, .. }
169            | Self::FocusWindow { task_id, .. }
170            | Self::ResizeWindow { task_id, .. }
171            | Self::RunCommand { task_id, .. }
172            | Self::ReadFile { task_id, .. }
173            | Self::WriteFile { task_id, .. }
174            | Self::BrowserOpen { task_id, .. }
175            | Self::BrowserGetDom { task_id }
176            | Self::BrowserClick { task_id, .. }
177            | Self::BrowserType { task_id, .. }
178            | Self::BrowserScreenshot { task_id } => task_id.as_deref(),
179        }
180    }
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
184pub struct ArtifactRef {
185    pub kind: String,
186    pub path: String,
187    pub mime_type: Option<String>,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
191pub struct StructuredError {
192    pub code: String,
193    pub message: String,
194    pub retryable: bool,
195    pub category: String,
196    pub details: Value,
197    #[serde(default)]
198    pub artifact_refs: Vec<ArtifactRef>,
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
202pub struct ActionReceipt {
203    pub status: String,
204    pub receipt_id: String,
205    pub action_type: String,
206    pub started_at: DateTime<Utc>,
207    pub completed_at: DateTime<Utc>,
208    pub result: Value,
209    #[serde(default)]
210    pub artifacts: Vec<ArtifactRef>,
211    pub error: Option<StructuredError>,
212}
213
214impl ActionReceipt {
215    pub fn success(
216        action_type: &str,
217        started_at: DateTime<Utc>,
218        result: Value,
219        artifacts: Vec<ArtifactRef>,
220    ) -> Self {
221        Self {
222            status: "ok".to_string(),
223            receipt_id: Uuid::new_v4().to_string(),
224            action_type: action_type.to_string(),
225            started_at,
226            completed_at: Utc::now(),
227            result,
228            artifacts,
229            error: None,
230        }
231    }
232
233    pub fn failure(action_type: &str, started_at: DateTime<Utc>, error: StructuredError) -> Self {
234        Self {
235            status: "error".to_string(),
236            receipt_id: Uuid::new_v4().to_string(),
237            action_type: action_type.to_string(),
238            started_at,
239            completed_at: Utc::now(),
240            result: json!({}),
241            artifacts: error.artifact_refs.clone(),
242            error: Some(error),
243        }
244    }
245}
246
247#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
248pub struct ScreenshotData {
249    pub mime_type: String,
250    #[serde(default)]
251    pub data_base64: Option<String>,
252    pub width: Option<u32>,
253    pub height: Option<u32>,
254    pub artifact_path: Option<String>,
255}
256
257#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
258pub struct WindowMetadata {
259    pub id: Option<String>,
260    pub title: Option<String>,
261    pub class_name: Option<String>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
265pub struct CursorPosition {
266    pub x: i32,
267    pub y: i32,
268    pub screen: Option<String>,
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
272pub struct BrowserSnapshot {
273    pub current_url: Option<String>,
274    pub title: Option<String>,
275    pub dom_html: Option<String>,
276    #[serde(default)]
277    pub console_logs: Vec<String>,
278    #[serde(default)]
279    pub network_events: Vec<String>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
283pub struct Observation {
284    pub captured_at: DateTime<Utc>,
285    pub screenshot: ScreenshotData,
286    pub active_window: Option<WindowMetadata>,
287    pub cursor_position: Option<CursorPosition>,
288    #[serde(default)]
289    pub capability_flags: Vec<String>,
290    pub browser: Option<BrowserSnapshot>,
291    pub raw: Value,
292    pub summary: Value,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
296pub struct ActionDescriptor {
297    pub name: String,
298    pub description: String,
299    pub category: String,
300    pub requires_approval: bool,
301}
302
303#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
304pub struct RuntimeCapabilities {
305    pub actions: Vec<ActionDescriptor>,
306    pub provider: String,
307    pub browser_mode: String,
308    pub vm_mode: String,
309    #[serde(default)]
310    pub enrichments: Vec<String>,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
314pub struct TaskStatus {
315    pub task_id: String,
316    pub state: String,
317    pub paused: bool,
318    pub approval_required: bool,
319    pub current_goal: Option<String>,
320}
321
322#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
323pub struct CreateSessionRequest {
324    #[serde(default = "default_provider")]
325    pub provider: String,
326    #[serde(default = "default_width")]
327    pub width: u32,
328    #[serde(default = "default_height")]
329    pub height: u32,
330    pub display: Option<String>,
331    pub browser_command: Option<String>,
332    pub boot: Option<String>,
333    pub container_image: Option<String>,
334    pub disable_kvm: Option<bool>,
335    pub qemu_profile: Option<String>,
336    pub shared_host_path: Option<String>,
337}
338
339#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
340pub struct SessionRecord {
341    pub id: String,
342    pub provider: String,
343    pub qemu_profile: Option<String>,
344    pub display: Option<String>,
345    pub width: u32,
346    pub height: u32,
347    pub state: String,
348    pub created_at: DateTime<Utc>,
349    pub artifacts_dir: String,
350    #[serde(default)]
351    pub capabilities: Vec<String>,
352    pub browser_command: Option<String>,
353    pub runtime_base_url: Option<String>,
354    pub viewer_url: Option<String>,
355    pub live_desktop_view: Option<LiveDesktopView>,
356    pub bridge_status: Option<String>,
357    pub readiness_state: Option<String>,
358    pub bridge_error: Option<StructuredError>,
359}
360
361#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
362pub struct LiveDesktopView {
363    pub mode: String,
364    pub status: String,
365    pub provider_surface: String,
366    pub matches_action_plane: bool,
367    pub canonical_url: Option<String>,
368    pub debug_url: Option<String>,
369    pub reason: Option<String>,
370    pub refresh_interval_ms: Option<u64>,
371}
372
373fn default_provider() -> String {
374    "qemu".to_string()
375}
376fn default_width() -> u32 {
377    1440
378}
379fn default_height() -> u32 {
380    900
381}
382
383pub fn default_available_actions() -> Vec<ActionDescriptor> {
384    vec![
385        (
386            "mouse_move",
387            "Move the cursor to absolute desktop coordinates",
388            "desktop",
389        ),
390        (
391            "mouse_click",
392            "Click a mouse button, optionally after moving to coordinates",
393            "desktop",
394        ),
395        (
396            "mouse_drag",
397            "Drag the mouse from one coordinate to another",
398            "desktop",
399        ),
400        ("key_press", "Press a single key", "desktop"),
401        (
402            "type_text",
403            "Type raw text into the focused input",
404            "desktop",
405        ),
406        ("hotkey", "Press a combination of keys in order", "desktop"),
407        ("scroll", "Scroll the active window or surface", "desktop"),
408        (
409            "open_app",
410            "Launch an application command inside the sandbox session",
411            "system",
412        ),
413        (
414            "focus_window",
415            "Attempt to focus a known X11 window id",
416            "desktop",
417        ),
418        ("resize_window", "Resize and move an X11 window", "desktop"),
419        (
420            "run_command",
421            "Run a shell command within the sandbox",
422            "system",
423        ),
424        (
425            "read_file",
426            "Read a file from the sandbox filesystem",
427            "filesystem",
428        ),
429        (
430            "write_file",
431            "Write a file in the sandbox filesystem",
432            "filesystem",
433        ),
434        (
435            "browser_open",
436            "Open a URL with the active browser adapter",
437            "browser",
438        ),
439        (
440            "browser_get_dom",
441            "Return the current DOM snapshot",
442            "browser",
443        ),
444        (
445            "browser_click",
446            "Click using a selector or coordinates in browser mode",
447            "browser",
448        ),
449        (
450            "browser_type",
451            "Type using a selector in browser mode",
452            "browser",
453        ),
454        (
455            "browser_screenshot",
456            "Capture a browser-specific screenshot",
457            "browser",
458        ),
459    ]
460    .into_iter()
461    .map(|(name, description, category)| ActionDescriptor {
462        name: name.to_string(),
463        description: description.to_string(),
464        category: category.to_string(),
465        requires_approval: false,
466    })
467    .collect()
468}
469
470pub fn capability_descriptor(provider: &str, enrichments: Vec<String>) -> RuntimeCapabilities {
471    RuntimeCapabilities {
472        actions: default_available_actions(),
473        provider: provider.to_string(),
474        browser_mode: "playwright".to_string(),
475        vm_mode: if provider == "qemu" {
476            "qemu".to_string()
477        } else {
478            "xvfb-dev".to_string()
479        },
480        enrichments,
481    }
482}
483
484pub fn write_schema_bundle(out_dir: &Path) -> std::io::Result<()> {
485    fs::create_dir_all(out_dir)?;
486    let bundles = [
487        (
488            "action.schema.json",
489            serde_json::to_vec_pretty(&schema_for!(ActionRequest))?,
490        ),
491        (
492            "observation.schema.json",
493            serde_json::to_vec_pretty(&schema_for!(Observation))?,
494        ),
495        (
496            "error.schema.json",
497            serde_json::to_vec_pretty(&schema_for!(StructuredError))?,
498        ),
499        (
500            "task.schema.json",
501            serde_json::to_vec_pretty(&schema_for!(TaskStatus))?,
502        ),
503    ];
504    for (name, bytes) in bundles {
505        fs::write(out_dir.join(name), bytes)?;
506    }
507    Ok(())
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn action_names_are_stable() {
516        let action = ActionRequest::MouseMove {
517            x: 1,
518            y: 2,
519            task_id: None,
520        };
521        assert_eq!(action.action_name(), "mouse_move");
522    }
523
524    #[test]
525    fn create_session_request_defaults_to_qemu_product_shape() {
526        let request: CreateSessionRequest =
527            serde_json::from_value(serde_json::json!({})).expect("default request");
528        assert_eq!(request.provider, "qemu");
529        assert_eq!(request.width, 1440);
530        assert_eq!(request.height, 900);
531        assert_eq!(request.qemu_profile, None);
532    }
533
534    #[test]
535    fn schema_bundle_writes() {
536        let temp = tempfile::tempdir().expect("tempdir");
537        write_schema_bundle(temp.path()).expect("write schemas");
538        assert!(temp.path().join("action.schema.json").exists());
539    }
540
541    #[test]
542    fn create_session_request_defaults_to_qemu_provider() {
543        let request: CreateSessionRequest =
544            serde_json::from_str(r#"{"width": 1280, "height": 720}"#).expect("request");
545        assert_eq!(request.provider, "qemu");
546        assert_eq!(request.width, 1280);
547        assert_eq!(request.height, 720);
548    }
549}