Skip to main content

cellos_host_firecracker/
api_client.rs

1//! Minimal Firecracker Management API client (HTTP/1.1 over Unix domain socket).
2//!
3//! Firecracker exposes a REST API on a Unix socket created at startup. All
4//! configuration calls are synchronous `PUT` requests that return 204 No Content
5//! on success, or a JSON error body on failure.
6//!
7//! This client is intentionally minimal — it covers only the paths needed for
8//! the L2-06 lifecycle (machine-config, boot-source, drive, action).
9//!
10//! # Platform support
11//!
12//! The HTTP-over-Unix-socket client (`FirecrackerApiClient`) is Linux-only —
13//! Firecracker itself only runs on Linux KVM, and the underlying transport
14//! (`tokio::net::UnixStream`) is unavailable on Windows. The pure-data request
15//! body types (`MachineConfig`, `BootSource`, …) are cross-platform so the
16//! workspace can `cargo check` cleanly on every host.
17
18use std::path::PathBuf;
19
20use serde::Serialize;
21
22#[cfg(target_os = "linux")]
23use std::time::Duration;
24
25#[cfg(target_os = "linux")]
26use bytes::Bytes;
27#[cfg(target_os = "linux")]
28use http_body_util::Full;
29#[cfg(target_os = "linux")]
30use hyper::body::Incoming;
31#[cfg(target_os = "linux")]
32use hyper::client::conn::http1;
33#[cfg(target_os = "linux")]
34use hyper::{Method, Request, Response, StatusCode};
35#[cfg(target_os = "linux")]
36use hyper_util::rt::TokioIo;
37#[cfg(target_os = "linux")]
38use tokio::net::UnixStream;
39#[cfg(target_os = "linux")]
40use tokio::time::timeout;
41#[cfg(target_os = "linux")]
42use tracing::instrument;
43
44#[cfg(target_os = "linux")]
45use cellos_core::CellosError;
46
47/// Connect timeout when the Firecracker process is starting.
48#[cfg(target_os = "linux")]
49const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
50/// Per-request timeout for API calls (configuration is fast on local socket).
51#[cfg(target_os = "linux")]
52const REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
53/// How often to probe for the socket file while waiting for Firecracker to start.
54#[cfg(target_os = "linux")]
55const SOCKET_POLL_INTERVAL: Duration = Duration::from_millis(50);
56
57/// HTTP client that speaks to one Firecracker VMM over its Unix API socket.
58///
59/// Linux-only — Firecracker is a Linux/KVM-only VMM. The struct is still
60/// declared on every platform (with no methods available off-Linux) so that
61/// path-typing in the Linux body of `lib.rs` does not trigger downstream
62/// rebuild thrash; non-Linux hosts cannot construct one because `new` is
63/// gated.
64#[derive(Clone, Debug)]
65pub struct FirecrackerApiClient {
66    #[allow(dead_code)]
67    socket_path: PathBuf,
68}
69
70#[cfg(target_os = "linux")]
71impl FirecrackerApiClient {
72    pub fn new(socket_path: impl Into<PathBuf>) -> Self {
73        Self {
74            socket_path: socket_path.into(),
75        }
76    }
77
78    /// Wait up to [`CONNECT_TIMEOUT`] for the socket file to appear, then
79    /// verify connectivity with a `GET /` request (returns 404 — that's fine,
80    /// it proves the VMM is listening).
81    #[instrument(skip(self), fields(socket = %self.socket_path.display()))]
82    pub async fn wait_for_ready(&self) -> Result<(), CellosError> {
83        let deadline = tokio::time::Instant::now() + CONNECT_TIMEOUT;
84        loop {
85            if self.socket_path.exists() {
86                // Socket file appeared — try connecting.
87                if UnixStream::connect(&self.socket_path).await.is_ok() {
88                    tracing::debug!("firecracker socket ready");
89                    return Ok(());
90                }
91            }
92            if tokio::time::Instant::now() >= deadline {
93                return Err(CellosError::Host(format!(
94                    "timed out waiting for Firecracker socket at {}",
95                    self.socket_path.display()
96                )));
97            }
98            tokio::time::sleep(SOCKET_POLL_INTERVAL).await;
99        }
100    }
101
102    /// `PUT <path>` with a JSON body.  Returns the HTTP status code.
103    ///
104    /// Firecracker returns 204 No Content for successful configuration calls.
105    #[instrument(skip(self, body), fields(socket = %self.socket_path.display(), path = path))]
106    pub async fn put<T: Serialize>(&self, path: &str, body: &T) -> Result<StatusCode, CellosError> {
107        self.send_json(Method::PUT, path, body).await
108    }
109
110    /// `PATCH <path>` with a JSON body.  Returns the HTTP status code.
111    ///
112    /// Used by the L2-06 warm-pool path to drive `PATCH /vm` with
113    /// `{"state":"Paused"}` between InstanceStart and `PUT /snapshot/create`.
114    /// Firecracker rejects `PUT /snapshot/create` unless the VM is Paused —
115    /// snapshotting a Running VM would race with vCPU writes to RAM.
116    #[instrument(skip(self, body), fields(socket = %self.socket_path.display(), path = path))]
117    pub async fn patch<T: Serialize>(
118        &self,
119        path: &str,
120        body: &T,
121    ) -> Result<StatusCode, CellosError> {
122        self.send_json(Method::PATCH, path, body).await
123    }
124
125    async fn send_json<T: Serialize>(
126        &self,
127        method: Method,
128        path: &str,
129        body: &T,
130    ) -> Result<StatusCode, CellosError> {
131        let body_bytes = serde_json::to_vec(body)
132            .map_err(|e| CellosError::Host(format!("serialize firecracker request: {e}")))?;
133
134        let req = Request::builder()
135            .method(method)
136            .uri(format!("http://localhost{path}"))
137            .header("Content-Type", "application/json")
138            .header("Accept", "application/json")
139            .header("Host", "localhost")
140            .body(Full::new(Bytes::from(body_bytes)))
141            .map_err(|e| CellosError::Host(format!("build firecracker request: {e}")))?;
142
143        let status = timeout(REQUEST_TIMEOUT, self.send_request(req))
144            .await
145            .map_err(|_| {
146                CellosError::Host(format!(
147                    "firecracker API request to {path} timed out after {}s",
148                    REQUEST_TIMEOUT.as_secs()
149                ))
150            })??;
151
152        Ok(status)
153    }
154
155    async fn send_request(&self, req: Request<Full<Bytes>>) -> Result<StatusCode, CellosError> {
156        let stream = UnixStream::connect(&self.socket_path).await.map_err(|e| {
157            CellosError::Host(format!(
158                "connect to firecracker socket {}: {e}",
159                self.socket_path.display()
160            ))
161        })?;
162
163        let io = TokioIo::new(stream);
164        let (mut sender, conn) = http1::handshake::<_, Full<Bytes>>(io)
165            .await
166            .map_err(|e| CellosError::Host(format!("firecracker HTTP handshake: {e}")))?;
167
168        // Drive the connection in the background; errors are surfaced via `sender`.
169        tokio::spawn(async move {
170            if let Err(e) = conn.await {
171                tracing::debug!(error = %e, "firecracker connection task ended");
172            }
173        });
174
175        let resp: Response<Incoming> = sender
176            .send_request(req)
177            .await
178            .map_err(|e| CellosError::Host(format!("firecracker HTTP send: {e}")))?;
179
180        Ok(resp.status())
181    }
182}
183
184// ── Firecracker API request bodies ──────────────────────────────────────────
185
186/// `PUT /machine-config`
187#[derive(Debug, Serialize)]
188pub struct MachineConfig {
189    pub vcpu_count: u32,
190    pub mem_size_mib: u32,
191    /// Hypervisor-managed time: forward host clock corrections into the guest.
192    #[serde(default)]
193    pub track_dirty_pages: bool,
194}
195
196/// `PUT /boot-source`
197#[derive(Debug, Serialize)]
198pub struct BootSource {
199    pub kernel_image_path: String,
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub boot_args: Option<String>,
202}
203
204/// `PUT /drives/{drive_id}`
205#[derive(Debug, Serialize)]
206pub struct Drive {
207    pub drive_id: String,
208    pub path_on_host: String,
209    pub is_root_device: bool,
210    pub is_read_only: bool,
211}
212
213/// `PUT /vsock`
214///
215/// Configures the virtio-vsock device.  The guest communicates with the host
216/// using AF_VSOCK sockets; Firecracker maps guest-initiated connections to
217/// Unix domain sockets on the host at `<uds_path>_<port>`.
218#[derive(Debug, Serialize)]
219pub struct VsockDevice {
220    /// CID assigned to the guest (must be ≥ 3; 2 = VMADDR_CID_HOST is reserved).
221    pub guest_cid: u32,
222    /// Base path of the host-side Unix domain socket.
223    /// Guest-initiated connections to port P arrive on `<uds_path>_P`.
224    pub uds_path: String,
225}
226
227/// `PUT /network-interfaces/{iface_id}`
228///
229/// Attaches a virtio-net device to the VM backed by a host TAP interface.
230/// `host_dev_name` must already exist on the host (created via `ip tuntap add`)
231/// and be owned by the uid Firecracker runs under, otherwise the API call
232/// fails with EPERM.
233#[derive(Debug, Serialize)]
234pub struct NetworkInterface {
235    pub iface_id: String,
236    pub guest_mac: String,
237    pub host_dev_name: String,
238}
239
240/// `PUT /actions`
241#[derive(Debug, Serialize)]
242pub struct InstanceAction {
243    pub action_type: InstanceActionType,
244}
245
246#[derive(Debug, Serialize)]
247pub enum InstanceActionType {
248    #[serde(rename = "InstanceStart")]
249    InstanceStart,
250    #[serde(rename = "SendCtrlAltDel")]
251    SendCtrlAltDel,
252}
253
254/// `PATCH /vm`
255///
256/// L2-06 warm pool: between `InstanceStart` and `PUT /snapshot/create` we
257/// transition the VM to `Paused`. Firecracker refuses to snapshot a Running
258/// VM (vCPU memory writes would race with the snapshot mmap). After
259/// snapshotting, we kill the original VMM — the on-disk snapshot is the
260/// durable artifact.
261#[derive(Debug, Serialize)]
262pub struct VmStatePatch {
263    pub state: VmState,
264}
265
266#[derive(Debug, Serialize)]
267pub enum VmState {
268    Paused,
269    Resumed,
270}
271
272/// `PUT /snapshot/create`
273///
274/// L2-06-2 fill() path. `snapshot_type=Full` writes both the VM state file
275/// (registers, devices) at `snapshot_path` AND a sibling memory dump at
276/// `mem_file_path`. The two files together are what `PUT /snapshot/load`
277/// consumes at restore time. Sizes: state ~ a few KiB, mem ~ guest RAM.
278#[derive(Debug, Serialize)]
279pub struct SnapshotCreate {
280    pub snapshot_type: SnapshotType,
281    pub snapshot_path: String,
282    pub mem_file_path: String,
283}
284
285#[derive(Debug, Serialize)]
286pub enum SnapshotType {
287    Full,
288    Diff,
289}
290
291/// `PUT /snapshot/load`
292///
293/// L2-06-2 checkout() path. Restores a Firecracker VMM from the paired
294/// state + memory files written by [`SnapshotCreate`]. `resume_vm=true`
295/// removes the need for a separate `PATCH /vm Resumed` after the load —
296/// the VM is Running by the time this returns 204.
297///
298/// Firecracker accepts a deprecated `mem_file_path` field and a newer
299/// `mem_backend` struct; we use the modern form (`File` backend) so the
300/// supervisor doesn't depend on a deprecation that may land in a future
301/// VMM upgrade.
302#[derive(Debug, Serialize)]
303pub struct SnapshotLoad {
304    pub snapshot_path: String,
305    pub mem_backend: MemBackend,
306    #[serde(default)]
307    pub enable_diff_snapshots: bool,
308    #[serde(default)]
309    pub resume_vm: bool,
310}
311
312#[derive(Debug, Serialize)]
313pub struct MemBackend {
314    pub backend_type: MemBackendType,
315    pub backend_path: String,
316}
317
318#[derive(Debug, Serialize)]
319pub enum MemBackendType {
320    File,
321    Uffd,
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn machine_config_serializes() {
330        let cfg = MachineConfig {
331            vcpu_count: 1,
332            mem_size_mib: 128,
333            track_dirty_pages: false,
334        };
335        let json = serde_json::to_string(&cfg).unwrap();
336        assert!(json.contains("\"vcpu_count\":1"));
337        assert!(json.contains("\"mem_size_mib\":128"));
338    }
339
340    #[test]
341    fn instance_start_action_serializes() {
342        let act = InstanceAction {
343            action_type: InstanceActionType::InstanceStart,
344        };
345        let json = serde_json::to_string(&act).unwrap();
346        assert!(json.contains("InstanceStart"));
347    }
348
349    #[test]
350    fn send_ctrl_alt_del_serializes() {
351        let act = InstanceAction {
352            action_type: InstanceActionType::SendCtrlAltDel,
353        };
354        let json = serde_json::to_string(&act).unwrap();
355        assert!(json.contains("SendCtrlAltDel"));
356    }
357
358    #[test]
359    fn boot_source_omits_optional_boot_args() {
360        let src = BootSource {
361            kernel_image_path: "/vmlinux".into(),
362            boot_args: None,
363        };
364        let json = serde_json::to_string(&src).unwrap();
365        assert!(!json.contains("boot_args"));
366    }
367
368    #[test]
369    fn vsock_device_serializes() {
370        let dev = VsockDevice {
371            guest_cid: 3,
372            uds_path: "/tmp/cellos-vsock.socket".into(),
373        };
374        let json = serde_json::to_string(&dev).unwrap();
375        assert!(json.contains("\"guest_cid\":3"));
376        assert!(json.contains("cellos-vsock.socket"));
377    }
378
379    #[test]
380    fn network_interface_serializes() {
381        let ni = NetworkInterface {
382            iface_id: "eth0".into(),
383            guest_mac: "AA:FC:00:00:00:01".into(),
384            host_dev_name: "cfc-abcd1234".into(),
385        };
386        let json = serde_json::to_string(&ni).unwrap();
387        assert!(json.contains("\"iface_id\":\"eth0\""));
388        assert!(json.contains("\"guest_mac\":\"AA:FC:00:00:00:01\""));
389        assert!(json.contains("\"host_dev_name\":\"cfc-abcd1234\""));
390    }
391
392    #[test]
393    fn vm_state_patch_paused_serializes() {
394        let p = VmStatePatch {
395            state: VmState::Paused,
396        };
397        let json = serde_json::to_string(&p).unwrap();
398        assert!(json.contains("\"state\":\"Paused\""), "got {json}");
399    }
400
401    #[test]
402    fn snapshot_create_serializes_full_with_paths() {
403        let s = SnapshotCreate {
404            snapshot_type: SnapshotType::Full,
405            snapshot_path: "/tmp/cellos-pool-0.snap".into(),
406            mem_file_path: "/tmp/cellos-pool-0.mem".into(),
407        };
408        let json = serde_json::to_string(&s).unwrap();
409        assert!(json.contains("\"snapshot_type\":\"Full\""), "got {json}");
410        assert!(json.contains("/tmp/cellos-pool-0.snap"));
411        assert!(json.contains("/tmp/cellos-pool-0.mem"));
412    }
413
414    #[test]
415    fn snapshot_load_serializes_with_file_backend_and_resume() {
416        let s = SnapshotLoad {
417            snapshot_path: "/tmp/cellos-pool-0.snap".into(),
418            mem_backend: MemBackend {
419                backend_type: MemBackendType::File,
420                backend_path: "/tmp/cellos-pool-0.mem".into(),
421            },
422            enable_diff_snapshots: false,
423            resume_vm: true,
424        };
425        let json = serde_json::to_string(&s).unwrap();
426        assert!(json.contains("\"backend_type\":\"File\""), "got {json}");
427        assert!(json.contains("\"resume_vm\":true"));
428        assert!(json.contains("/tmp/cellos-pool-0.snap"));
429        assert!(json.contains("/tmp/cellos-pool-0.mem"));
430    }
431
432    #[test]
433    fn boot_source_includes_boot_args_when_set() {
434        let src = BootSource {
435            kernel_image_path: "/vmlinux".into(),
436            boot_args: Some("console=ttyS0 reboot=k panic=1".into()),
437        };
438        let json = serde_json::to_string(&src).unwrap();
439        assert!(json.contains("boot_args"));
440        assert!(json.contains("console=ttyS0"));
441    }
442}