1use std::collections::BTreeMap;
2use std::fs;
3use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use schemars::{JsonSchema, schema_for};
7use serde::{Deserialize, Serialize};
8use serde_json::{Value, json};
9use uuid::Uuid;
10
11#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
12#[serde(rename_all = "snake_case")]
13pub enum MouseButton {
14 Left,
15 Middle,
16 Right,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
20pub struct Bounds {
21 pub x: i32,
22 pub y: i32,
23 pub width: u32,
24 pub height: u32,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
28#[serde(tag = "kind", rename_all = "snake_case")]
29pub enum ActionRequest {
30 MouseMove {
31 x: i32,
32 y: i32,
33 #[serde(default)]
34 task_id: Option<String>,
35 },
36 MouseClick {
37 button: Option<MouseButton>,
38 x: Option<i32>,
39 y: Option<i32>,
40 #[serde(default)]
41 task_id: Option<String>,
42 },
43 MouseDrag {
44 start_x: i32,
45 start_y: i32,
46 end_x: i32,
47 end_y: i32,
48 #[serde(default)]
49 task_id: Option<String>,
50 },
51 KeyPress {
52 key: String,
53 #[serde(default)]
54 task_id: Option<String>,
55 },
56 TypeText {
57 text: String,
58 #[serde(default)]
59 task_id: Option<String>,
60 },
61 Hotkey {
62 keys: Vec<String>,
63 #[serde(default)]
64 task_id: Option<String>,
65 },
66 Scroll {
67 delta_x: i32,
68 delta_y: i32,
69 #[serde(default)]
70 task_id: Option<String>,
71 },
72 OpenApp {
73 name: String,
74 #[serde(default)]
75 task_id: Option<String>,
76 },
77 FocusWindow {
78 window_id: String,
79 #[serde(default)]
80 task_id: Option<String>,
81 },
82 ResizeWindow {
83 window_id: String,
84 bounds: Bounds,
85 #[serde(default)]
86 task_id: Option<String>,
87 },
88 RunCommand {
89 command: String,
90 cwd: Option<String>,
91 env: Option<BTreeMap<String, String>>,
92 #[serde(default)]
93 task_id: Option<String>,
94 },
95 ReadFile {
96 path: String,
97 #[serde(default)]
98 task_id: Option<String>,
99 },
100 WriteFile {
101 path: String,
102 contents: String,
103 #[serde(default)]
104 task_id: Option<String>,
105 },
106 BrowserOpen {
107 url: String,
108 #[serde(default)]
109 task_id: Option<String>,
110 },
111 BrowserGetDom {
112 #[serde(default)]
113 task_id: Option<String>,
114 },
115 BrowserClick {
116 selector: Option<String>,
117 x: Option<i32>,
118 y: Option<i32>,
119 button: Option<MouseButton>,
120 #[serde(default)]
121 task_id: Option<String>,
122 },
123 BrowserType {
124 selector: Option<String>,
125 text: String,
126 #[serde(default)]
127 task_id: Option<String>,
128 },
129 BrowserScreenshot {
130 #[serde(default)]
131 task_id: Option<String>,
132 },
133}
134
135impl ActionRequest {
136 pub fn action_name(&self) -> &'static str {
137 match self {
138 Self::MouseMove { .. } => "mouse_move",
139 Self::MouseClick { .. } => "mouse_click",
140 Self::MouseDrag { .. } => "mouse_drag",
141 Self::KeyPress { .. } => "key_press",
142 Self::TypeText { .. } => "type_text",
143 Self::Hotkey { .. } => "hotkey",
144 Self::Scroll { .. } => "scroll",
145 Self::OpenApp { .. } => "open_app",
146 Self::FocusWindow { .. } => "focus_window",
147 Self::ResizeWindow { .. } => "resize_window",
148 Self::RunCommand { .. } => "run_command",
149 Self::ReadFile { .. } => "read_file",
150 Self::WriteFile { .. } => "write_file",
151 Self::BrowserOpen { .. } => "browser_open",
152 Self::BrowserGetDom { .. } => "browser_get_dom",
153 Self::BrowserClick { .. } => "browser_click",
154 Self::BrowserType { .. } => "browser_type",
155 Self::BrowserScreenshot { .. } => "browser_screenshot",
156 }
157 }
158
159 pub fn task_id(&self) -> Option<&str> {
160 match self {
161 Self::MouseMove { task_id, .. }
162 | Self::MouseClick { task_id, .. }
163 | Self::MouseDrag { task_id, .. }
164 | Self::KeyPress { task_id, .. }
165 | Self::TypeText { task_id, .. }
166 | Self::Hotkey { task_id, .. }
167 | Self::Scroll { task_id, .. }
168 | Self::OpenApp { task_id, .. }
169 | Self::FocusWindow { task_id, .. }
170 | Self::ResizeWindow { task_id, .. }
171 | Self::RunCommand { task_id, .. }
172 | Self::ReadFile { task_id, .. }
173 | Self::WriteFile { task_id, .. }
174 | Self::BrowserOpen { task_id, .. }
175 | Self::BrowserGetDom { task_id }
176 | Self::BrowserClick { task_id, .. }
177 | Self::BrowserType { task_id, .. }
178 | Self::BrowserScreenshot { task_id } => task_id.as_deref(),
179 }
180 }
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
184pub struct ArtifactRef {
185 pub kind: String,
186 pub path: String,
187 pub mime_type: Option<String>,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
191pub struct StructuredError {
192 pub code: String,
193 pub message: String,
194 pub retryable: bool,
195 pub category: String,
196 pub details: Value,
197 #[serde(default)]
198 pub artifact_refs: Vec<ArtifactRef>,
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
202pub struct ActionReceipt {
203 pub status: String,
204 pub receipt_id: String,
205 pub action_type: String,
206 pub started_at: DateTime<Utc>,
207 pub completed_at: DateTime<Utc>,
208 pub result: Value,
209 #[serde(default)]
210 pub artifacts: Vec<ArtifactRef>,
211 pub error: Option<StructuredError>,
212}
213
214impl ActionReceipt {
215 pub fn success(
216 action_type: &str,
217 started_at: DateTime<Utc>,
218 result: Value,
219 artifacts: Vec<ArtifactRef>,
220 ) -> Self {
221 Self {
222 status: "ok".to_string(),
223 receipt_id: Uuid::new_v4().to_string(),
224 action_type: action_type.to_string(),
225 started_at,
226 completed_at: Utc::now(),
227 result,
228 artifacts,
229 error: None,
230 }
231 }
232
233 pub fn failure(action_type: &str, started_at: DateTime<Utc>, error: StructuredError) -> Self {
234 Self {
235 status: "error".to_string(),
236 receipt_id: Uuid::new_v4().to_string(),
237 action_type: action_type.to_string(),
238 started_at,
239 completed_at: Utc::now(),
240 result: json!({}),
241 artifacts: error.artifact_refs.clone(),
242 error: Some(error),
243 }
244 }
245}
246
247#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
248pub struct ScreenshotData {
249 pub mime_type: String,
250 #[serde(default)]
251 pub data_base64: Option<String>,
252 pub width: Option<u32>,
253 pub height: Option<u32>,
254 pub artifact_path: Option<String>,
255}
256
257#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
258pub struct WindowMetadata {
259 pub id: Option<String>,
260 pub title: Option<String>,
261 pub class_name: Option<String>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
265pub struct CursorPosition {
266 pub x: i32,
267 pub y: i32,
268 pub screen: Option<String>,
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
272pub struct BrowserSnapshot {
273 pub current_url: Option<String>,
274 pub title: Option<String>,
275 pub dom_html: Option<String>,
276 #[serde(default)]
277 pub console_logs: Vec<String>,
278 #[serde(default)]
279 pub network_events: Vec<String>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
283pub struct Observation {
284 pub captured_at: DateTime<Utc>,
285 pub screenshot: ScreenshotData,
286 pub active_window: Option<WindowMetadata>,
287 pub cursor_position: Option<CursorPosition>,
288 #[serde(default)]
289 pub capability_flags: Vec<String>,
290 pub browser: Option<BrowserSnapshot>,
291 pub raw: Value,
292 pub summary: Value,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
296pub struct ActionDescriptor {
297 pub name: String,
298 pub description: String,
299 pub category: String,
300 pub requires_approval: bool,
301}
302
303#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
304pub struct RuntimeCapabilities {
305 pub actions: Vec<ActionDescriptor>,
306 pub provider: String,
307 pub browser_mode: String,
308 pub vm_mode: String,
309 #[serde(default)]
310 pub enrichments: Vec<String>,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
314pub struct TaskStatus {
315 pub task_id: String,
316 pub state: String,
317 pub paused: bool,
318 pub approval_required: bool,
319 pub current_goal: Option<String>,
320}
321
322#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
323pub struct CreateSessionRequest {
324 #[serde(default = "default_provider")]
325 pub provider: String,
326 #[serde(default = "default_width")]
327 pub width: u32,
328 #[serde(default = "default_height")]
329 pub height: u32,
330 pub display: Option<String>,
331 pub browser_command: Option<String>,
332 pub boot: Option<String>,
333 pub container_image: Option<String>,
334 pub disable_kvm: Option<bool>,
335 pub qemu_profile: Option<String>,
336 pub shared_host_path: Option<String>,
337}
338
339#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
340pub struct SessionRecord {
341 pub id: String,
342 pub provider: String,
343 pub qemu_profile: Option<String>,
344 pub display: Option<String>,
345 pub width: u32,
346 pub height: u32,
347 pub state: String,
348 pub created_at: DateTime<Utc>,
349 pub artifacts_dir: String,
350 #[serde(default)]
351 pub capabilities: Vec<String>,
352 pub browser_command: Option<String>,
353 pub runtime_base_url: Option<String>,
354 pub viewer_url: Option<String>,
355 pub live_desktop_view: Option<LiveDesktopView>,
356 pub bridge_status: Option<String>,
357 pub readiness_state: Option<String>,
358 pub bridge_error: Option<StructuredError>,
359}
360
361#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
362pub struct LiveDesktopView {
363 pub mode: String,
364 pub status: String,
365 pub provider_surface: String,
366 pub matches_action_plane: bool,
367 pub canonical_url: Option<String>,
368 pub debug_url: Option<String>,
369 pub reason: Option<String>,
370 pub refresh_interval_ms: Option<u64>,
371}
372
373fn default_provider() -> String {
374 "qemu".to_string()
375}
376fn default_width() -> u32 {
377 1440
378}
379fn default_height() -> u32 {
380 900
381}
382
383pub fn default_available_actions() -> Vec<ActionDescriptor> {
384 vec![
385 (
386 "mouse_move",
387 "Move the cursor to absolute desktop coordinates",
388 "desktop",
389 ),
390 (
391 "mouse_click",
392 "Click a mouse button, optionally after moving to coordinates",
393 "desktop",
394 ),
395 (
396 "mouse_drag",
397 "Drag the mouse from one coordinate to another",
398 "desktop",
399 ),
400 ("key_press", "Press a single key", "desktop"),
401 (
402 "type_text",
403 "Type raw text into the focused input",
404 "desktop",
405 ),
406 ("hotkey", "Press a combination of keys in order", "desktop"),
407 ("scroll", "Scroll the active window or surface", "desktop"),
408 (
409 "open_app",
410 "Launch an application command inside the sandbox session",
411 "system",
412 ),
413 (
414 "focus_window",
415 "Attempt to focus a known X11 window id",
416 "desktop",
417 ),
418 ("resize_window", "Resize and move an X11 window", "desktop"),
419 (
420 "run_command",
421 "Run a shell command within the sandbox",
422 "system",
423 ),
424 (
425 "read_file",
426 "Read a file from the sandbox filesystem",
427 "filesystem",
428 ),
429 (
430 "write_file",
431 "Write a file in the sandbox filesystem",
432 "filesystem",
433 ),
434 (
435 "browser_open",
436 "Open a URL with the active browser adapter",
437 "browser",
438 ),
439 (
440 "browser_get_dom",
441 "Return the current DOM snapshot",
442 "browser",
443 ),
444 (
445 "browser_click",
446 "Click using a selector or coordinates in browser mode",
447 "browser",
448 ),
449 (
450 "browser_type",
451 "Type using a selector in browser mode",
452 "browser",
453 ),
454 (
455 "browser_screenshot",
456 "Capture a browser-specific screenshot",
457 "browser",
458 ),
459 ]
460 .into_iter()
461 .map(|(name, description, category)| ActionDescriptor {
462 name: name.to_string(),
463 description: description.to_string(),
464 category: category.to_string(),
465 requires_approval: false,
466 })
467 .collect()
468}
469
470pub fn capability_descriptor(provider: &str, enrichments: Vec<String>) -> RuntimeCapabilities {
471 RuntimeCapabilities {
472 actions: default_available_actions(),
473 provider: provider.to_string(),
474 browser_mode: "playwright".to_string(),
475 vm_mode: if provider == "qemu" {
476 "qemu".to_string()
477 } else {
478 "xvfb-dev".to_string()
479 },
480 enrichments,
481 }
482}
483
484pub fn write_schema_bundle(out_dir: &Path) -> std::io::Result<()> {
485 fs::create_dir_all(out_dir)?;
486 let bundles = [
487 (
488 "action.schema.json",
489 serde_json::to_vec_pretty(&schema_for!(ActionRequest))?,
490 ),
491 (
492 "observation.schema.json",
493 serde_json::to_vec_pretty(&schema_for!(Observation))?,
494 ),
495 (
496 "error.schema.json",
497 serde_json::to_vec_pretty(&schema_for!(StructuredError))?,
498 ),
499 (
500 "task.schema.json",
501 serde_json::to_vec_pretty(&schema_for!(TaskStatus))?,
502 ),
503 ];
504 for (name, bytes) in bundles {
505 fs::write(out_dir.join(name), bytes)?;
506 }
507 Ok(())
508}
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513
514 #[test]
515 fn action_names_are_stable() {
516 let action = ActionRequest::MouseMove {
517 x: 1,
518 y: 2,
519 task_id: None,
520 };
521 assert_eq!(action.action_name(), "mouse_move");
522 }
523
524 #[test]
525 fn create_session_request_defaults_to_qemu_product_shape() {
526 let request: CreateSessionRequest =
527 serde_json::from_value(serde_json::json!({})).expect("default request");
528 assert_eq!(request.provider, "qemu");
529 assert_eq!(request.width, 1440);
530 assert_eq!(request.height, 900);
531 assert_eq!(request.qemu_profile, None);
532 }
533
534 #[test]
535 fn schema_bundle_writes() {
536 let temp = tempfile::tempdir().expect("tempdir");
537 write_schema_bundle(temp.path()).expect("write schemas");
538 assert!(temp.path().join("action.schema.json").exists());
539 }
540
541 #[test]
542 fn create_session_request_defaults_to_qemu_provider() {
543 let request: CreateSessionRequest =
544 serde_json::from_str(r#"{"width": 1280, "height": 720}"#).expect("request");
545 assert_eq!(request.provider, "qemu");
546 assert_eq!(request.width, 1280);
547 assert_eq!(request.height, 720);
548 }
549}