cellos-host-firecracker 0.5.0

Firecracker microVM backend for CellOS — jailer integration, warm pool with snapshot/restore, KVM nested-virtualisation aware.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
//! Minimal Firecracker Management API client (HTTP/1.1 over Unix domain socket).
//!
//! Firecracker exposes a REST API on a Unix socket created at startup. All
//! configuration calls are synchronous `PUT` requests that return 204 No Content
//! on success, or a JSON error body on failure.
//!
//! This client is intentionally minimal — it covers only the paths needed for
//! the L2-06 lifecycle (machine-config, boot-source, drive, action).
//!
//! # Platform support
//!
//! The HTTP-over-Unix-socket client (`FirecrackerApiClient`) is Linux-only —
//! Firecracker itself only runs on Linux KVM, and the underlying transport
//! (`tokio::net::UnixStream`) is unavailable on Windows. The pure-data request
//! body types (`MachineConfig`, `BootSource`, …) are cross-platform so the
//! workspace can `cargo check` cleanly on every host.

use std::path::PathBuf;

use serde::Serialize;

#[cfg(target_os = "linux")]
use std::time::Duration;

#[cfg(target_os = "linux")]
use bytes::Bytes;
#[cfg(target_os = "linux")]
use http_body_util::Full;
#[cfg(target_os = "linux")]
use hyper::body::Incoming;
#[cfg(target_os = "linux")]
use hyper::client::conn::http1;
#[cfg(target_os = "linux")]
use hyper::{Method, Request, Response, StatusCode};
#[cfg(target_os = "linux")]
use hyper_util::rt::TokioIo;
#[cfg(target_os = "linux")]
use tokio::net::UnixStream;
#[cfg(target_os = "linux")]
use tokio::time::timeout;
#[cfg(target_os = "linux")]
use tracing::instrument;

#[cfg(target_os = "linux")]
use cellos_core::CellosError;

/// Connect timeout when the Firecracker process is starting.
#[cfg(target_os = "linux")]
const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
/// Per-request timeout for API calls (configuration is fast on local socket).
#[cfg(target_os = "linux")]
const REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
/// How often to probe for the socket file while waiting for Firecracker to start.
#[cfg(target_os = "linux")]
const SOCKET_POLL_INTERVAL: Duration = Duration::from_millis(50);

/// HTTP client that speaks to one Firecracker VMM over its Unix API socket.
///
/// Linux-only — Firecracker is a Linux/KVM-only VMM. The struct is still
/// declared on every platform (with no methods available off-Linux) so that
/// path-typing in the Linux body of `lib.rs` does not trigger downstream
/// rebuild thrash; non-Linux hosts cannot construct one because `new` is
/// gated.
#[derive(Clone, Debug)]
pub struct FirecrackerApiClient {
    #[allow(dead_code)]
    socket_path: PathBuf,
}

#[cfg(target_os = "linux")]
impl FirecrackerApiClient {
    pub fn new(socket_path: impl Into<PathBuf>) -> Self {
        Self {
            socket_path: socket_path.into(),
        }
    }

    /// Wait up to [`CONNECT_TIMEOUT`] for the socket file to appear, then
    /// verify connectivity with a `GET /` request (returns 404 — that's fine,
    /// it proves the VMM is listening).
    #[instrument(skip(self), fields(socket = %self.socket_path.display()))]
    pub async fn wait_for_ready(&self) -> Result<(), CellosError> {
        let deadline = tokio::time::Instant::now() + CONNECT_TIMEOUT;
        loop {
            if self.socket_path.exists() {
                // Socket file appeared — try connecting.
                if UnixStream::connect(&self.socket_path).await.is_ok() {
                    tracing::debug!("firecracker socket ready");
                    return Ok(());
                }
            }
            if tokio::time::Instant::now() >= deadline {
                return Err(CellosError::Host(format!(
                    "timed out waiting for Firecracker socket at {}",
                    self.socket_path.display()
                )));
            }
            tokio::time::sleep(SOCKET_POLL_INTERVAL).await;
        }
    }

    /// `PUT <path>` with a JSON body.  Returns the HTTP status code.
    ///
    /// Firecracker returns 204 No Content for successful configuration calls.
    #[instrument(skip(self, body), fields(socket = %self.socket_path.display(), path = path))]
    pub async fn put<T: Serialize>(&self, path: &str, body: &T) -> Result<StatusCode, CellosError> {
        self.send_json(Method::PUT, path, body).await
    }

    /// `PATCH <path>` with a JSON body.  Returns the HTTP status code.
    ///
    /// Used by the L2-06 warm-pool path to drive `PATCH /vm` with
    /// `{"state":"Paused"}` between InstanceStart and `PUT /snapshot/create`.
    /// Firecracker rejects `PUT /snapshot/create` unless the VM is Paused —
    /// snapshotting a Running VM would race with vCPU writes to RAM.
    #[instrument(skip(self, body), fields(socket = %self.socket_path.display(), path = path))]
    pub async fn patch<T: Serialize>(
        &self,
        path: &str,
        body: &T,
    ) -> Result<StatusCode, CellosError> {
        self.send_json(Method::PATCH, path, body).await
    }

    async fn send_json<T: Serialize>(
        &self,
        method: Method,
        path: &str,
        body: &T,
    ) -> Result<StatusCode, CellosError> {
        let body_bytes = serde_json::to_vec(body)
            .map_err(|e| CellosError::Host(format!("serialize firecracker request: {e}")))?;

        let req = Request::builder()
            .method(method)
            .uri(format!("http://localhost{path}"))
            .header("Content-Type", "application/json")
            .header("Accept", "application/json")
            .header("Host", "localhost")
            .body(Full::new(Bytes::from(body_bytes)))
            .map_err(|e| CellosError::Host(format!("build firecracker request: {e}")))?;

        let status = timeout(REQUEST_TIMEOUT, self.send_request(req))
            .await
            .map_err(|_| {
                CellosError::Host(format!(
                    "firecracker API request to {path} timed out after {}s",
                    REQUEST_TIMEOUT.as_secs()
                ))
            })??;

        Ok(status)
    }

    async fn send_request(&self, req: Request<Full<Bytes>>) -> Result<StatusCode, CellosError> {
        let stream = UnixStream::connect(&self.socket_path).await.map_err(|e| {
            CellosError::Host(format!(
                "connect to firecracker socket {}: {e}",
                self.socket_path.display()
            ))
        })?;

        let io = TokioIo::new(stream);
        let (mut sender, conn) = http1::handshake::<_, Full<Bytes>>(io)
            .await
            .map_err(|e| CellosError::Host(format!("firecracker HTTP handshake: {e}")))?;

        // Drive the connection in the background; errors are surfaced via `sender`.
        tokio::spawn(async move {
            if let Err(e) = conn.await {
                tracing::debug!(error = %e, "firecracker connection task ended");
            }
        });

        let resp: Response<Incoming> = sender
            .send_request(req)
            .await
            .map_err(|e| CellosError::Host(format!("firecracker HTTP send: {e}")))?;

        Ok(resp.status())
    }
}

// ── Firecracker API request bodies ──────────────────────────────────────────

/// `PUT /machine-config`
#[derive(Debug, Serialize)]
pub struct MachineConfig {
    pub vcpu_count: u32,
    pub mem_size_mib: u32,
    /// Hypervisor-managed time: forward host clock corrections into the guest.
    #[serde(default)]
    pub track_dirty_pages: bool,
}

/// `PUT /boot-source`
#[derive(Debug, Serialize)]
pub struct BootSource {
    pub kernel_image_path: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub boot_args: Option<String>,
}

/// `PUT /drives/{drive_id}`
#[derive(Debug, Serialize)]
pub struct Drive {
    pub drive_id: String,
    pub path_on_host: String,
    pub is_root_device: bool,
    pub is_read_only: bool,
}

/// `PUT /vsock`
///
/// Configures the virtio-vsock device.  The guest communicates with the host
/// using AF_VSOCK sockets; Firecracker maps guest-initiated connections to
/// Unix domain sockets on the host at `<uds_path>_<port>`.
#[derive(Debug, Serialize)]
pub struct VsockDevice {
    /// CID assigned to the guest (must be ≥ 3; 2 = VMADDR_CID_HOST is reserved).
    pub guest_cid: u32,
    /// Base path of the host-side Unix domain socket.
    /// Guest-initiated connections to port P arrive on `<uds_path>_P`.
    pub uds_path: String,
}

/// `PUT /network-interfaces/{iface_id}`
///
/// Attaches a virtio-net device to the VM backed by a host TAP interface.
/// `host_dev_name` must already exist on the host (created via `ip tuntap add`)
/// and be owned by the uid Firecracker runs under, otherwise the API call
/// fails with EPERM.
#[derive(Debug, Serialize)]
pub struct NetworkInterface {
    pub iface_id: String,
    pub guest_mac: String,
    pub host_dev_name: String,
}

/// `PUT /actions`
#[derive(Debug, Serialize)]
pub struct InstanceAction {
    pub action_type: InstanceActionType,
}

#[derive(Debug, Serialize)]
pub enum InstanceActionType {
    #[serde(rename = "InstanceStart")]
    InstanceStart,
    #[serde(rename = "SendCtrlAltDel")]
    SendCtrlAltDel,
}

/// `PATCH /vm`
///
/// L2-06 warm pool: between `InstanceStart` and `PUT /snapshot/create` we
/// transition the VM to `Paused`. Firecracker refuses to snapshot a Running
/// VM (vCPU memory writes would race with the snapshot mmap). After
/// snapshotting, we kill the original VMM — the on-disk snapshot is the
/// durable artifact.
#[derive(Debug, Serialize)]
pub struct VmStatePatch {
    pub state: VmState,
}

#[derive(Debug, Serialize)]
pub enum VmState {
    Paused,
    Resumed,
}

/// `PUT /snapshot/create`
///
/// L2-06-2 fill() path. `snapshot_type=Full` writes both the VM state file
/// (registers, devices) at `snapshot_path` AND a sibling memory dump at
/// `mem_file_path`. The two files together are what `PUT /snapshot/load`
/// consumes at restore time. Sizes: state ~ a few KiB, mem ~ guest RAM.
#[derive(Debug, Serialize)]
pub struct SnapshotCreate {
    pub snapshot_type: SnapshotType,
    pub snapshot_path: String,
    pub mem_file_path: String,
}

#[derive(Debug, Serialize)]
pub enum SnapshotType {
    Full,
    Diff,
}

/// `PUT /snapshot/load`
///
/// L2-06-2 checkout() path. Restores a Firecracker VMM from the paired
/// state + memory files written by [`SnapshotCreate`]. `resume_vm=true`
/// removes the need for a separate `PATCH /vm Resumed` after the load —
/// the VM is Running by the time this returns 204.
///
/// Firecracker accepts a deprecated `mem_file_path` field and a newer
/// `mem_backend` struct; we use the modern form (`File` backend) so the
/// supervisor doesn't depend on a deprecation that may land in a future
/// VMM upgrade.
#[derive(Debug, Serialize)]
pub struct SnapshotLoad {
    pub snapshot_path: String,
    pub mem_backend: MemBackend,
    #[serde(default)]
    pub enable_diff_snapshots: bool,
    #[serde(default)]
    pub resume_vm: bool,
}

#[derive(Debug, Serialize)]
pub struct MemBackend {
    pub backend_type: MemBackendType,
    pub backend_path: String,
}

#[derive(Debug, Serialize)]
pub enum MemBackendType {
    File,
    Uffd,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn machine_config_serializes() {
        let cfg = MachineConfig {
            vcpu_count: 1,
            mem_size_mib: 128,
            track_dirty_pages: false,
        };
        let json = serde_json::to_string(&cfg).unwrap();
        assert!(json.contains("\"vcpu_count\":1"));
        assert!(json.contains("\"mem_size_mib\":128"));
    }

    #[test]
    fn instance_start_action_serializes() {
        let act = InstanceAction {
            action_type: InstanceActionType::InstanceStart,
        };
        let json = serde_json::to_string(&act).unwrap();
        assert!(json.contains("InstanceStart"));
    }

    #[test]
    fn send_ctrl_alt_del_serializes() {
        let act = InstanceAction {
            action_type: InstanceActionType::SendCtrlAltDel,
        };
        let json = serde_json::to_string(&act).unwrap();
        assert!(json.contains("SendCtrlAltDel"));
    }

    #[test]
    fn boot_source_omits_optional_boot_args() {
        let src = BootSource {
            kernel_image_path: "/vmlinux".into(),
            boot_args: None,
        };
        let json = serde_json::to_string(&src).unwrap();
        assert!(!json.contains("boot_args"));
    }

    #[test]
    fn vsock_device_serializes() {
        let dev = VsockDevice {
            guest_cid: 3,
            uds_path: "/tmp/cellos-vsock.socket".into(),
        };
        let json = serde_json::to_string(&dev).unwrap();
        assert!(json.contains("\"guest_cid\":3"));
        assert!(json.contains("cellos-vsock.socket"));
    }

    #[test]
    fn network_interface_serializes() {
        let ni = NetworkInterface {
            iface_id: "eth0".into(),
            guest_mac: "AA:FC:00:00:00:01".into(),
            host_dev_name: "cfc-abcd1234".into(),
        };
        let json = serde_json::to_string(&ni).unwrap();
        assert!(json.contains("\"iface_id\":\"eth0\""));
        assert!(json.contains("\"guest_mac\":\"AA:FC:00:00:00:01\""));
        assert!(json.contains("\"host_dev_name\":\"cfc-abcd1234\""));
    }

    #[test]
    fn vm_state_patch_paused_serializes() {
        let p = VmStatePatch {
            state: VmState::Paused,
        };
        let json = serde_json::to_string(&p).unwrap();
        assert!(json.contains("\"state\":\"Paused\""), "got {json}");
    }

    #[test]
    fn snapshot_create_serializes_full_with_paths() {
        let s = SnapshotCreate {
            snapshot_type: SnapshotType::Full,
            snapshot_path: "/tmp/cellos-pool-0.snap".into(),
            mem_file_path: "/tmp/cellos-pool-0.mem".into(),
        };
        let json = serde_json::to_string(&s).unwrap();
        assert!(json.contains("\"snapshot_type\":\"Full\""), "got {json}");
        assert!(json.contains("/tmp/cellos-pool-0.snap"));
        assert!(json.contains("/tmp/cellos-pool-0.mem"));
    }

    #[test]
    fn snapshot_load_serializes_with_file_backend_and_resume() {
        let s = SnapshotLoad {
            snapshot_path: "/tmp/cellos-pool-0.snap".into(),
            mem_backend: MemBackend {
                backend_type: MemBackendType::File,
                backend_path: "/tmp/cellos-pool-0.mem".into(),
            },
            enable_diff_snapshots: false,
            resume_vm: true,
        };
        let json = serde_json::to_string(&s).unwrap();
        assert!(json.contains("\"backend_type\":\"File\""), "got {json}");
        assert!(json.contains("\"resume_vm\":true"));
        assert!(json.contains("/tmp/cellos-pool-0.snap"));
        assert!(json.contains("/tmp/cellos-pool-0.mem"));
    }

    #[test]
    fn boot_source_includes_boot_args_when_set() {
        let src = BootSource {
            kernel_image_path: "/vmlinux".into(),
            boot_args: Some("console=ttyS0 reboot=k panic=1".into()),
        };
        let json = serde_json::to_string(&src).unwrap();
        assert!(json.contains("boot_args"));
        assert!(json.contains("console=ttyS0"));
    }
}