Skip to main content

vmette_proto/
daemon.rs

1//! The **`vmetted` UNIX-socket protocol**: line-delimited JSON, one request
2//! object in, one-or-more reply objects out. Two independent request/reply
3//! pairs share the socket:
4//!
5//! * **Stateless run** — [`Request`] in, a stream of [`Frame`]s out. The daemon
6//!   boots a one-shot capture-aware `vmette::Session` in-process and streams its
7//!   guest output. This object carries no `kind` tag; the daemon routes to it as
8//!   the default.
9//! * **Stateful desktop** — a [`DesktopRequest`] in (internally tagged by
10//!   `kind`), a single [`DesktopReply`] out. These drive live, persistent
11//!   desktop sessions held in the daemon's session registry.
12//!
13//! The desktop reply payloads are standalone structs ([`ActionReply`],
14//! [`SettleReply`], …) that double as the [`DesktopReply`] variants, so the
15//! daemon builds them and a client reads them back as the *same* Rust types.
16//!
17//! Fields with a server-side default are modelled as [`Option`] and skipped on
18//! the wire when absent: a client expresses "unspecified" as `None`, and the
19//! daemon owns the one true default. The stateless [`Request`] follows the same
20//! rule — its optional fields map to a `vmette::Config` only when set (the
21//! daemon runs the workload in-process via `Config::from_run_request`).
22
23use std::path::PathBuf;
24
25use serde::{Deserialize, Serialize};
26
27use crate::agent::Action;
28use crate::geom::Rect;
29use crate::mount::ShareMount;
30
31// ---- stateless run path -------------------------------------------------
32
33/// One stateless run request: boot a one-shot microVM, relay its output. The
34/// daemon and the MCP server map this to a `vmette::Config` and run it
35/// in-process. Carries no `kind` tag.
36///
37/// Fields with a binary-side default are modelled as [`Option`] and left unset
38/// when `None`, so the consumer applies the one true default and no value is
39/// spelled twice. `kernel`, `initramfs`, `rootfs`, and
40/// `exec` are always required.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct Request {
43    pub kernel: PathBuf,
44    pub initramfs: PathBuf,
45    /// Rootfs spec dispatched through the CLI's provider registry.
46    /// See `vmette providers` for valid forms (path, image ref, tar+...).
47    pub rootfs: String,
48    #[serde(default)]
49    pub rootfs_ro: bool,
50    #[serde(default)]
51    pub offline: bool,
52    #[serde(default)]
53    pub shares: Vec<ShareMount>,
54    #[serde(default)]
55    pub disks: Vec<PathBuf>,
56    pub exec: String,
57    #[serde(default)]
58    pub net: bool,
59    #[serde(default)]
60    pub switch_root: bool,
61    /// vsock port: -1 disable, 0 auto, >0 fixed. `None` → CLI default (auto).
62    #[serde(default, skip_serializing_if = "Option::is_none")]
63    pub vsock_port: Option<i32>,
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub guest_vsock_port: Option<u32>,
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub timeout_seconds: Option<u32>,
68    #[serde(default, skip_serializing_if = "Option::is_none")]
69    pub vcpus: Option<u8>,
70    #[serde(default, skip_serializing_if = "Option::is_none")]
71    pub mem_mib: Option<u64>,
72    /// Ephemeral ext4 scratch disk size in MiB for the writable overlay upper
73    /// (the CLI's `--scratch`). `None` → no scratch disk (RAM-backed tmpfs
74    /// overlay). Rendered as a bare-MiB `--scratch <n>`.
75    #[serde(default, skip_serializing_if = "Option::is_none")]
76    pub scratch_mib: Option<u64>,
77}
78
79/// One streamed reply line from the stateless run path. The daemon emits many
80/// `Stdout` frames (the guest's combined output) followed by a terminal `Exit`
81/// (or `Error`). `Stderr` remains in the protocol for compatibility but the
82/// in-process run lane folds guest stderr into `Stdout`.
83#[derive(Debug, Clone, Serialize, Deserialize)]
84#[serde(tag = "kind", rename_all = "lowercase")]
85pub enum Frame {
86    Stdout { data: String },
87    Stderr { data: String },
88    Exit { code: i32 },
89    Error { message: String },
90}
91
92// ---- stateful desktop path: requests ------------------------------------
93
94/// A desktop request, internally tagged by `kind`. The daemon routes desktop
95/// connections here; each variant's payload is a standalone struct.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97#[serde(tag = "kind", rename_all = "snake_case")]
98pub enum DesktopRequest {
99    /// Boot a persistent desktop VM. `image` is resolved client-side; the
100    /// remaining defaulted fields (`vcpus`, `mem_mib`, `size`) are filled by
101    /// the daemon when absent.
102    DesktopStart(DesktopStart),
103    /// Run one computer-use action against a live session.
104    DesktopAction(DesktopAction),
105    /// Poll until the desktop stops changing, then return that frame plus the
106    /// regions still moving.
107    DesktopScreenshotSettled(DesktopScreenshotSettled),
108    /// Capture one frame and report what moved since the previous capture.
109    DesktopWhatChanged(DesktopWhatChanged),
110    /// Start (or look up) a live VNC view of the session and return the
111    /// loopback address a VNC client connects to. Idempotent.
112    DesktopView(DesktopView),
113    /// Tear a live session down.
114    DesktopStop(DesktopStop),
115}
116
117/// Payload of [`DesktopRequest::DesktopStart`]. The kernel + initramfs are the
118/// ordinary vmette assets; desktop-ness comes from `image` + the Agent
119/// workload. `None` optional fields take the daemon's defaults.
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct DesktopStart {
122    pub kernel: PathBuf,
123    pub initramfs: PathBuf,
124    /// OCI/tar/path rootfs spec. Resolved client-side (explicit `--image` →
125    /// `$VMETTE_DESKTOP_IMAGE` → local `vmette-desktop-rootfs.tar` → registry
126    /// fallback) exactly like kernel/initramfs, so the daemon receives a
127    /// concrete spec and owns no desktop-image default.
128    pub image: String,
129    /// "WIDTHxHEIGHT"; daemon defaults to 1280x800 when absent/unparseable.
130    #[serde(default, skip_serializing_if = "Option::is_none")]
131    pub size: Option<String>,
132    #[serde(default)]
133    pub net: bool,
134    #[serde(default)]
135    pub offline: bool,
136    /// Host directories mounted into the desktop VM at `/mnt/<tag>`.
137    #[serde(default, skip_serializing_if = "Vec::is_empty")]
138    pub shares: Vec<ShareMount>,
139    #[serde(default, skip_serializing_if = "Option::is_none")]
140    pub vcpus: Option<u8>,
141    #[serde(default, skip_serializing_if = "Option::is_none")]
142    pub mem_mib: Option<u64>,
143}
144
145/// Payload of [`DesktopRequest::DesktopAction`].
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct DesktopAction {
148    pub session_id: String,
149    pub action: Action,
150}
151
152/// Payload of [`DesktopRequest::DesktopScreenshotSettled`].
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct DesktopScreenshotSettled {
155    pub session_id: String,
156    /// Max time to wait for the screen to settle before returning the latest
157    /// frame anyway (with `settled: false`). Daemon defaults to 10s.
158    #[serde(default, skip_serializing_if = "Option::is_none")]
159    pub timeout_ms: Option<u64>,
160    /// How long the screen must stay continuously settled before the frame is
161    /// returned. Bridges the quiescent gap a network-bound app shows between
162    /// painting its chrome and its content: a transient settle (a blank page
163    /// mid-load) is interrupted when content paints and so does not satisfy the
164    /// hold, while a video/spinner is excluded as churn and never resets it.
165    /// Daemon defaults to a small confirmation hold; `desktop_launch` passes a
166    /// larger one.
167    #[serde(default, skip_serializing_if = "Option::is_none")]
168    pub stable_hold_ms: Option<u64>,
169}
170
171/// Payload of [`DesktopRequest::DesktopWhatChanged`].
172#[derive(Debug, Clone, Serialize, Deserialize)]
173pub struct DesktopWhatChanged {
174    pub session_id: String,
175}
176
177/// Payload of [`DesktopRequest::DesktopView`].
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct DesktopView {
180    pub session_id: String,
181}
182
183/// Payload of [`DesktopRequest::DesktopStop`].
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct DesktopStop {
186    pub session_id: String,
187}
188
189// ---- stateful desktop path: replies -------------------------------------
190
191/// A single desktop reply, internally tagged by `kind`. Each variant wraps a
192/// standalone payload struct the daemon builds and the client reads back.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(tag = "kind", rename_all = "snake_case")]
195pub enum DesktopReply {
196    Session(SessionReply),
197    ActionResult(ActionReply),
198    Settled(SettleReply),
199    Changed(ChangedReply),
200    View(ViewReply),
201    Stopped,
202    Error(ErrorReply),
203}
204
205/// Reply to `desktop_start`: the new session's id.
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct SessionReply {
208    pub session_id: String,
209}
210
211/// Reply to `desktop_action`: the agent's response-header fields plus an
212/// optional base64 PNG (present for `screenshot`).
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct ActionReply {
215    pub ok: bool,
216    #[serde(default, skip_serializing_if = "Option::is_none")]
217    pub error: Option<String>,
218    #[serde(default, skip_serializing_if = "Option::is_none")]
219    pub x: Option<i32>,
220    #[serde(default, skip_serializing_if = "Option::is_none")]
221    pub y: Option<i32>,
222    /// Base64 PNG for `screenshot`; absent otherwise.
223    #[serde(default, skip_serializing_if = "Option::is_none")]
224    pub png_base64: Option<String>,
225    /// Clipboard contents for `get_clipboard` (the response payload decoded as
226    /// UTF-8); absent for every other action.
227    #[serde(default, skip_serializing_if = "Option::is_none")]
228    pub text: Option<String>,
229    /// Exit status for `exec_capture` (`None` ⇒ the command did not exit
230    /// cleanly, e.g. it timed out); absent for every other action. The
231    /// command's combined stdout/stderr is returned in `text`.
232    #[serde(default, skip_serializing_if = "Option::is_none")]
233    pub exit_code: Option<i32>,
234}
235
236/// Reply to `desktop_screenshot_settled`: the captured frame, whether it
237/// actually settled (vs. timed out), and the regions still moving.
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct SettleReply {
240    pub settled: bool,
241    pub moving: Vec<Rect>,
242    pub png_base64: String,
243}
244
245/// Reply to `desktop_what_changed`: a fresh frame and the damage box (absent
246/// when nothing changed since the previous capture).
247#[derive(Debug, Clone, Serialize, Deserialize)]
248pub struct ChangedReply {
249    #[serde(default, skip_serializing_if = "Option::is_none")]
250    pub changed: Option<Rect>,
251    pub png_base64: String,
252}
253
254/// Reply to `desktop_view`: the loopback `host:port` a VNC client connects to
255/// for a live, interactive view of the session (e.g. `127.0.0.1:5901`). Bound
256/// to loopback only; the view streams the agent's screen and forwards a human
257/// viewer's pointer/keyboard back as computer-use actions.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct ViewReply {
260    pub addr: String,
261}
262
263/// Reply carrying a daemon-side error message (any failed request).
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ErrorReply {
266    pub message: String,
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn run_request_leaves_unset_optionals_none() {
275        let req: Request =
276            serde_json::from_str(r#"{"kernel":"/k","initramfs":"/i","rootfs":"/r","exec":"echo"}"#)
277                .unwrap();
278        assert_eq!(req.vsock_port, None);
279        assert_eq!(req.guest_vsock_port, None);
280        assert_eq!(req.vcpus, None);
281        assert_eq!(req.mem_mib, None);
282        assert!(!req.net);
283    }
284
285    #[test]
286    fn frame_tags_are_lowercase() {
287        let j = serde_json::to_string(&Frame::Exit { code: 0 }).unwrap();
288        assert_eq!(j, r#"{"kind":"exit","code":0}"#);
289    }
290
291    #[test]
292    fn desktop_request_deserializes_by_kind() {
293        let r: DesktopRequest =
294            serde_json::from_str(r#"{"kind":"desktop_stop","session_id":"abc"}"#).unwrap();
295        match r {
296            DesktopRequest::DesktopStop(s) => assert_eq!(s.session_id, "abc"),
297            other => panic!("wrong variant: {other:?}"),
298        }
299    }
300
301    #[test]
302    fn desktop_action_carries_typed_action() {
303        let r: DesktopRequest = serde_json::from_str(
304            r#"{"kind":"desktop_action","session_id":"s","action":{"action":"left_click"}}"#,
305        )
306        .unwrap();
307        match r {
308            DesktopRequest::DesktopAction(a) => {
309                assert_eq!(a.session_id, "s");
310                assert_eq!(a.action, Action::LeftClick);
311            }
312            other => panic!("wrong variant: {other:?}"),
313        }
314    }
315
316    #[test]
317    fn desktop_start_omits_unset_optionals() {
318        let j = serde_json::to_string(&DesktopRequest::DesktopStart(DesktopStart {
319            kernel: "/k".into(),
320            initramfs: "/i".into(),
321            image: "alpine:3.20".into(),
322            size: None,
323            net: true,
324            offline: false,
325            shares: Vec::new(),
326            vcpus: None,
327            mem_mib: None,
328        }))
329        .unwrap();
330        // kind + the always-present fields (image is required, resolved
331        // client-side); size/vcpus/mem_mib stay omitted when None.
332        assert_eq!(
333            j,
334            r#"{"kind":"desktop_start","kernel":"/k","initramfs":"/i","image":"alpine:3.20","net":true,"offline":false}"#
335        );
336    }
337
338    #[test]
339    fn reply_session_flattens_under_kind() {
340        let j = serde_json::to_string(&DesktopReply::Session(SessionReply {
341            session_id: "deadbeef".into(),
342        }))
343        .unwrap();
344        assert_eq!(j, r#"{"kind":"session","session_id":"deadbeef"}"#);
345    }
346
347    #[test]
348    fn reply_action_omits_none_fields() {
349        let j = serde_json::to_string(&DesktopReply::ActionResult(ActionReply {
350            ok: true,
351            error: None,
352            x: None,
353            y: None,
354            png_base64: None,
355            text: None,
356            exit_code: None,
357        }))
358        .unwrap();
359        assert_eq!(j, r#"{"kind":"action_result","ok":true}"#);
360    }
361
362    #[test]
363    fn reply_settled_carries_moving_rects() {
364        let j = serde_json::to_string(&DesktopReply::Settled(SettleReply {
365            settled: true,
366            moving: vec![Rect {
367                x: 1,
368                y: 2,
369                w: 3,
370                h: 4,
371            }],
372            png_base64: "AA".into(),
373        }))
374        .unwrap();
375        assert_eq!(
376            j,
377            r#"{"kind":"settled","settled":true,"moving":[{"x":1,"y":2,"w":3,"h":4}],"png_base64":"AA"}"#
378        );
379    }
380
381    #[test]
382    fn reply_changed_omits_absent_damage() {
383        let j = serde_json::to_string(&DesktopReply::Changed(ChangedReply {
384            changed: None,
385            png_base64: "AA".into(),
386        }))
387        .unwrap();
388        assert_eq!(j, r#"{"kind":"changed","png_base64":"AA"}"#);
389    }
390
391    #[test]
392    fn desktop_view_request_round_trips() {
393        let r: DesktopRequest =
394            serde_json::from_str(r#"{"kind":"desktop_view","session_id":"s"}"#).unwrap();
395        match r {
396            DesktopRequest::DesktopView(v) => assert_eq!(v.session_id, "s"),
397            other => panic!("wrong variant: {other:?}"),
398        }
399    }
400
401    #[test]
402    fn reply_view_flattens_under_kind() {
403        let j = serde_json::to_string(&DesktopReply::View(ViewReply {
404            addr: "127.0.0.1:5901".into(),
405        }))
406        .unwrap();
407        assert_eq!(j, r#"{"kind":"view","addr":"127.0.0.1:5901"}"#);
408        let back: DesktopReply = serde_json::from_str(&j).unwrap();
409        match back {
410            DesktopReply::View(v) => assert_eq!(v.addr, "127.0.0.1:5901"),
411            other => panic!("wrong variant: {other:?}"),
412        }
413    }
414
415    #[test]
416    fn reply_error_round_trips() {
417        let j = serde_json::to_string(&DesktopReply::Error(ErrorReply {
418            message: "boom".into(),
419        }))
420        .unwrap();
421        assert_eq!(j, r#"{"kind":"error","message":"boom"}"#);
422        let back: DesktopReply = serde_json::from_str(&j).unwrap();
423        match back {
424            DesktopReply::Error(e) => assert_eq!(e.message, "boom"),
425            other => panic!("wrong variant: {other:?}"),
426        }
427    }
428}