cellos-host-firecracker 0.5.0

Firecracker microVM backend for CellOS — jailer integration, warm pool with snapshot/restore, KVM nested-virtualisation aware.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
//! Pre-booted Firecracker VM pool — L2-06-2.
//!
//! Cold-booting a Firecracker microVM costs ~125 ms wall-clock on warm hosts
//! (kernel decompression + init + cellos-init handshake). For agentic
//! workloads that spawn many short-lived cells, that overhead dominates the
//! workload's actual runtime. The remedy is the snapshot-restore path:
//! pre-boot a VM to a known state (kernel up, vsock + virtio devices wired,
//! cellos-init parked waiting for a `cellos.argv` cmdline), take a snapshot,
//! and restore from the snapshot at cell-create time. Restore is ~10 ms.
//!
//! This module implements the *pool state machine* and the *fill* API. The
//! integration into [`FirecrackerCellBackend::create`] is gated behind the
//! `CELLOS_FIRECRACKER_POOL_SIZE` environment variable — default `0` means
//! the pool is disabled and `create` follows the cold-boot path verbatim.
//! When `>0`, a future commit wires `checkout` into `create` ahead of
//! `configure_vm` and `checkin` into `destroy`.
//!
//! # Why a skeleton?
//!
//! The full snapshot path needs:
//!   * a Firecracker child managed by [`tokio::process::Child`] long enough to
//!     accept `PUT /snapshot/create` and then exit cleanly;
//!   * disk space accounting for memory snapshots (the `--mem-file-path` blob
//!     is the same size as the VM's RAM allocation);
//!   * a separate restore code path inside `create` that calls
//!     `PUT /snapshot/load` instead of `PUT /machine-config` + `PUT /boot-source`.
//!
//! All three of those land in subsequent L2-06 commits. This file pins the
//! contract — the state machine, the `checkout`/`checkin` API shape, and the
//! gating env var — so the wiring change in the live `create` path is a
//! mechanical follow-up rather than a redesign.
//!
//! # State machine
//!
//! Each slot transitions:
//!
//! ```text
//!   Empty ──fill()──▶ Available ──checkout()──▶ InUse ──checkin()──▶ Empty
//! ```
//!
//! `checkin` returns the slot to `Empty` (not `Available`) by design: a VM
//! that ran a cell is no longer at the parked-init snapshot state, so it
//! cannot be re-used without re-snapshotting from a fresh boot. A later
//! background filler re-populates the slot. This is the same lifecycle
//! AWS Lambda uses for warm-pool execution environments.

use std::path::PathBuf;

#[cfg(target_os = "linux")]
use std::time::Duration;

#[cfg(target_os = "linux")]
use crate::api_client::{
    BootSource, Drive, FirecrackerApiClient, InstanceAction, InstanceActionType, MachineConfig,
    MemBackend, MemBackendType, SnapshotCreate, SnapshotLoad, SnapshotType, VmState, VmStatePatch,
};
#[cfg(target_os = "linux")]
use cellos_core::CellosError;

/// Environment variable that toggles the warm pool. Default `0` (disabled);
/// any positive integer enables the pool with that many slots.
pub const POOL_SIZE_ENV: &str = "CELLOS_FIRECRACKER_POOL_SIZE";

/// State of one slot in the warm pool.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PoolSlot {
    /// Slot has a snapshot on disk and is ready to be checked out.
    Available {
        /// Filesystem path to the Firecracker VM state file written by
        /// `PUT /snapshot/create`. `PUT /snapshot/load` consumes this as
        /// `snapshot_path` at restore time.
        snapshot_path: PathBuf,
        /// Filesystem path to the paired memory dump (`mem_file_path` in the
        /// `SnapshotCreate` body). Required at restore time via the
        /// `mem_backend.backend_path` field on `PUT /snapshot/load`.
        mem_file_path: PathBuf,
        /// Stable identifier for the pre-booted VM (used in logs / metrics).
        vm_id: String,
    },
    /// Slot has been handed to an active cell — not eligible for checkout.
    InUse {
        /// Cell id currently holding this slot. Used by `checkin` to validate
        /// the caller is releasing the slot they actually checked out.
        cell_id: String,
    },
    /// Slot has no snapshot yet (initial state, or post-`checkin` awaiting
    /// background re-fill).
    Empty,
}

/// Pre-booted Firecracker VM pool for fast cell startup.
///
/// Each slot is a VM that has booted to the kernel's init stage and been
/// snapshot'd — ready to restore in ~10 ms vs cold-boot ~125 ms.
///
/// **Thread-safety:** the pool is currently `&mut self`-driven for clarity.
/// The wiring inside [`FirecrackerCellBackend`] will wrap it in
/// `tokio::sync::Mutex<FirecrackerPool>` (same pattern as `running_vms`) so
/// concurrent `create` / `destroy` calls serialize on slot allocation.
///
/// [`FirecrackerCellBackend`]: crate::FirecrackerCellBackend
pub struct FirecrackerPool {
    size: usize,
    slots: Vec<PoolSlot>,
}

impl FirecrackerPool {
    /// Construct an empty pool with `size` slots, all in [`PoolSlot::Empty`].
    /// `size==0` is valid and yields a pool whose `checkout` always returns
    /// `None` — the wiring code uses this to short-circuit when the env var
    /// is unset or zero.
    pub fn new(size: usize) -> Self {
        Self {
            size,
            slots: (0..size).map(|_| PoolSlot::Empty).collect(),
        }
    }

    /// Number of slots configured for this pool (any state).
    pub fn size(&self) -> usize {
        self.size
    }

    /// Number of [`PoolSlot::Available`] slots — the number of cells that can
    /// be served by the fast-path right now.
    pub fn available(&self) -> usize {
        self.slots
            .iter()
            .filter(|s| matches!(s, PoolSlot::Available { .. }))
            .count()
    }

    /// Number of [`PoolSlot::InUse`] slots.
    pub fn in_use(&self) -> usize {
        self.slots
            .iter()
            .filter(|s| matches!(s, PoolSlot::InUse { .. }))
            .count()
    }

    /// Reserve an available snapshot for `cell_id`, transitioning the slot
    /// from `Available` to `InUse`. Returns the snapshot path on success, or
    /// `None` if no `Available` slot exists (caller falls back to cold-boot).
    ///
    /// Marked `async` for symmetry with the future implementation that will
    /// hold a `tokio::sync::Mutex`. The body is currently synchronous.
    pub async fn checkout(&mut self, cell_id: &str) -> Option<PathBuf> {
        for slot in self.slots.iter_mut() {
            if let PoolSlot::Available { snapshot_path, .. } = slot {
                let path = snapshot_path.clone();
                *slot = PoolSlot::InUse {
                    cell_id: cell_id.to_string(),
                };
                return Some(path);
            }
        }
        None
    }

    /// Release the slot previously checked out by `cell_id`, transitioning it
    /// to [`PoolSlot::Empty`]. A background filler is expected to re-populate
    /// the slot via [`Self::fill`]; this is intentional — a VM that ran a
    /// real cell is no longer at the parked-init state, so re-using its
    /// snapshot would leak workload-side state into the next cell.
    ///
    /// Returns `true` if a matching `InUse { cell_id }` slot was found and
    /// reset, `false` otherwise (call was a no-op).
    pub async fn checkin(&mut self, cell_id: &str) -> bool {
        for slot in self.slots.iter_mut() {
            if let PoolSlot::InUse { cell_id: held } = slot {
                if held == cell_id {
                    *slot = PoolSlot::Empty;
                    return true;
                }
            }
        }
        false
    }

    /// Boot one VM per `Empty` slot, snapshot it, and transition the slot to
    /// [`PoolSlot::Available`]. No-op for slots already in `Available` or
    /// `InUse`.
    ///
    /// On Linux (the only platform Firecracker runs on) this spawns one VMM
    /// per empty slot, drives the configure → InstanceStart → wait-for-init
    /// → PATCH-Paused → PUT-snapshot/create sequence, then kills the child
    /// process. The pair of `(snapshot_path, mem_file_path)` files left
    /// behind on disk is the durable artifact a future `checkout` will load.
    ///
    /// Off-Linux this is a no-op — Firecracker is not available, so the
    /// pool stays empty and `checkout` returns `None`, falling
    /// `FirecrackerCellBackend::create` through to its cold-boot path.
    ///
    /// Failures during fill are logged and the slot is left `Empty` (so a
    /// subsequent fill can retry); we don't propagate errors out of `fill`
    /// because the pool is a best-effort latency optimisation, not a
    /// correctness gate.
    #[cfg(target_os = "linux")]
    pub async fn fill(&mut self, firecracker_bin: &str, kernel: &str, rootfs: &str) {
        for (idx, slot) in self.slots.iter_mut().enumerate() {
            if !matches!(slot, PoolSlot::Empty) {
                continue;
            }
            match fill_one_slot(firecracker_bin, kernel, rootfs, idx).await {
                Ok((snapshot_path, mem_file_path, vm_id)) => {
                    tracing::info!(
                        slot = idx,
                        snapshot = %snapshot_path.display(),
                        mem = %mem_file_path.display(),
                        "warm pool slot filled"
                    );
                    *slot = PoolSlot::Available {
                        snapshot_path,
                        mem_file_path,
                        vm_id,
                    };
                }
                Err(e) => {
                    tracing::warn!(slot = idx, error = %e, "warm pool fill failed; slot stays Empty");
                }
            }
        }
    }

    /// Off-Linux stub — Firecracker only runs on Linux/KVM.
    #[cfg(not(target_os = "linux"))]
    pub async fn fill(&mut self, _firecracker_bin: &str, _kernel: &str, _rootfs: &str) {
        tracing::debug!(
            pool_size = self.size,
            "FirecrackerPool::fill no-op: target_os != linux"
        );
    }
}

/// Restore a previously-captured snapshot into a fresh Firecracker VMM via
/// `PUT /snapshot/load`. The caller owns the VMM process and its API socket
/// — this helper only drives the load + resume call sequence.
///
/// Linux-only because the API client transport (`UnixStream`) is Linux-only.
/// `FirecrackerCellBackend::create` calls this with the path returned by
/// [`FirecrackerPool::checkout`] when the pool produced a fast-path slot;
/// off-Linux the pool is always empty so this helper is never reached.
#[cfg(target_os = "linux")]
pub async fn restore_into(
    client: &FirecrackerApiClient,
    snapshot_path: &std::path::Path,
    mem_file_path: &std::path::Path,
) -> Result<(), CellosError> {
    let status = client
        .put(
            "/snapshot/load",
            &SnapshotLoad {
                snapshot_path: snapshot_path.to_string_lossy().into_owned(),
                mem_backend: MemBackend {
                    backend_type: MemBackendType::File,
                    backend_path: mem_file_path.to_string_lossy().into_owned(),
                },
                enable_diff_snapshots: false,
                resume_vm: true,
            },
        )
        .await?;
    if !status.is_success() {
        return Err(CellosError::Host(format!(
            "firecracker /snapshot/load returned HTTP {status}"
        )));
    }
    Ok(())
}

/// Boot one Firecracker VMM, snapshot it, kill the child, return the on-disk
/// paths plus a stable vm-id. Linux-only.
///
/// Path discipline: snapshot files land at
/// `/tmp/cellos-pool-<vm_id>.snap` (state) and `/tmp/cellos-pool-<vm_id>.mem`
/// (memory dump). The VMM API socket lives at
/// `/tmp/cellos-pool-<vm_id>.socket`. We `remove_file` the socket on
/// teardown so re-fills don't `EEXIST` on `bind`.
#[cfg(target_os = "linux")]
async fn fill_one_slot(
    firecracker_bin: &str,
    kernel: &str,
    rootfs: &str,
    slot_idx: usize,
) -> Result<(PathBuf, PathBuf, String), CellosError> {
    use tokio::time::sleep;
    use uuid::Uuid;

    let vm_id = format!("pool-{}-{}", slot_idx, Uuid::new_v4().simple());
    let socket_path = PathBuf::from(format!("/tmp/cellos-pool-{vm_id}.socket"));
    let snapshot_path = PathBuf::from(format!("/tmp/cellos-pool-{vm_id}.snap"));
    let mem_file_path = PathBuf::from(format!("/tmp/cellos-pool-{vm_id}.mem"));

    // Stale socket from a crashed previous run would make Firecracker fail
    // to `bind()`. Best-effort remove (ignore NotFound).
    let _ = std::fs::remove_file(&socket_path);

    // Spawn the VMM. Same direct-invocation shape as `build_direct_argv`
    // in lib.rs — no jailer because the warm pool's VM never runs workload
    // code; it boots cellos-init, gets snapshotted, and dies. The chroot
    // boundary is therefore not load-bearing for the fill path.
    let socket_str = socket_path.to_string_lossy().into_owned();
    let mut child = tokio::process::Command::new(firecracker_bin)
        .args(["--api-sock", socket_str.as_str(), "--level", "Error"])
        .kill_on_drop(true)
        .spawn()
        .map_err(|e| CellosError::Host(format!("spawn firecracker for pool fill: {e}")))?;

    // From here on, any error path must kill the child + clean up sockets
    // before surfacing.
    let fill = async {
        let client = FirecrackerApiClient::new(&socket_path);
        client.wait_for_ready().await?;

        // Minimal machine config — pool VMs are stamped out from a single
        // snapshot, so we use a small static footprint. The supervisor's
        // hot path can still attach a larger scratch image at restore-time
        // via a subsequent `PUT /drives/...`.
        let mc = client
            .put(
                "/machine-config",
                &MachineConfig {
                    vcpu_count: 1,
                    mem_size_mib: 128,
                    track_dirty_pages: false,
                },
            )
            .await?;
        if !mc.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker /machine-config returned HTTP {mc}"
            )));
        }

        let bs = client
            .put(
                "/boot-source",
                &BootSource {
                    kernel_image_path: kernel.to_string(),
                    // `reboot=k panic=1` is the standard Firecracker pair —
                    // we never expect to reboot, but if the kernel panics
                    // during snapshot prep we want a clean exit rather than
                    // a hung VMM.
                    boot_args: Some("console=ttyS0 reboot=k panic=1 pci=off nomodules".to_string()),
                },
            )
            .await?;
        if !bs.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker /boot-source returned HTTP {bs}"
            )));
        }

        let drv = client
            .put(
                "/drives/rootfs",
                &Drive {
                    drive_id: "rootfs".into(),
                    path_on_host: rootfs.to_string(),
                    is_root_device: true,
                    is_read_only: true,
                },
            )
            .await?;
        if !drv.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker /drives/rootfs returned HTTP {drv}"
            )));
        }

        let start = client
            .put(
                "/actions",
                &InstanceAction {
                    action_type: InstanceActionType::InstanceStart,
                },
            )
            .await?;
        if !start.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker InstanceStart returned HTTP {start}"
            )));
        }

        // Wait for cellos-init to reach the parked state. The robust
        // signal is a vsock readiness ping (see the lib.rs `boot_result`
        // block's exit-code listener), but for the warm-pool path we don't
        // yet require an init-side vsock dialog — the kernel-mode handoff
        // to userspace is what we want to capture in the snapshot, not the
        // full init handshake. A short fixed wait gives Firecracker enough
        // wall time to bring up the vCPU and reach the parked userspace
        // before we pause. This matches the wall-clock that AWS Lambda's
        // microVM warmer uses for its pre-warm pool.
        sleep(Duration::from_millis(500)).await;

        // Pause the VM before snapshotting — Firecracker refuses to
        // snapshot a Running VM.
        let pause = client
            .patch(
                "/vm",
                &VmStatePatch {
                    state: VmState::Paused,
                },
            )
            .await?;
        if !pause.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker PATCH /vm Paused returned HTTP {pause}"
            )));
        }

        let snap = client
            .put(
                "/snapshot/create",
                &SnapshotCreate {
                    snapshot_type: SnapshotType::Full,
                    snapshot_path: snapshot_path.to_string_lossy().into_owned(),
                    mem_file_path: mem_file_path.to_string_lossy().into_owned(),
                },
            )
            .await?;
        if !snap.is_success() {
            return Err(CellosError::Host(format!(
                "firecracker /snapshot/create returned HTTP {snap}"
            )));
        }

        Ok::<(), CellosError>(())
    };

    let result = fill.await;

    // Tear down the source VMM. The snapshot is the durable artifact; the
    // original Running-then-Paused process is no longer needed. `kill()`
    // sends SIGKILL; we then `wait()` so we don't leave a zombie.
    let _ = child.kill().await;
    let _ = child.wait().await;
    let _ = std::fs::remove_file(&socket_path);

    result.map(|()| (snapshot_path, mem_file_path, vm_id))
}

/// Read [`POOL_SIZE_ENV`] from the process environment and parse it.
/// Returns `0` (pool disabled) when unset, empty, or unparseable — the
/// fail-closed default. A non-zero value enables the pool.
pub fn pool_size_from_env() -> usize {
    std::env::var(POOL_SIZE_ENV)
        .ok()
        .and_then(|v| v.trim().parse::<usize>().ok())
        .unwrap_or(0)
}

// ── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    /// A fresh pool of size N has N `Empty` slots, zero `Available`, zero
    /// `InUse`. `checkout` returns `None` because nothing has been filled.
    #[tokio::test]
    async fn new_pool_starts_empty() {
        let mut pool = FirecrackerPool::new(3);
        assert_eq!(pool.size(), 3);
        assert_eq!(pool.available(), 0);
        assert_eq!(pool.in_use(), 0);
        // checkout on an empty pool must yield None — the gate that lets
        // create() fall through to cold-boot.
        assert!(pool.checkout("cell-1").await.is_none());
    }

    /// `size=0` is the disabled-pool sentinel: `checkout` always yields
    /// `None`, `checkin` always yields `false`, no panics.
    #[tokio::test]
    async fn zero_size_pool_is_inert() {
        let mut pool = FirecrackerPool::new(0);
        assert_eq!(pool.size(), 0);
        assert!(pool.checkout("any-cell").await.is_none());
        assert!(!pool.checkin("any-cell").await);
    }

    /// State machine: an `Available` slot can be checked out (→ `InUse`),
    /// then checked in (→ `Empty`). Two cells trying to checkout from a
    /// one-slot pool: first wins, second gets `None`.
    #[tokio::test]
    async fn checkout_then_checkin_cycles_slot_through_states() {
        let mut pool = FirecrackerPool::new(1);
        // Hand-place an Available slot so we can exercise checkout without
        // relying on the (stubbed) fill() implementation.
        pool.slots[0] = PoolSlot::Available {
            snapshot_path: PathBuf::from("/tmp/snap-1"),
            mem_file_path: PathBuf::from("/tmp/snap-1.mem"),
            vm_id: "vm-1".to_string(),
        };
        assert_eq!(pool.available(), 1);

        let path = pool.checkout("cell-1").await;
        assert_eq!(path, Some(PathBuf::from("/tmp/snap-1")));
        assert_eq!(pool.available(), 0);
        assert_eq!(pool.in_use(), 1);

        // Second checkout from a now-empty pool returns None — the cold-boot
        // fallback signal.
        assert!(pool.checkout("cell-2").await.is_none());

        // Checkin by the holding cell_id transitions the slot to Empty.
        assert!(pool.checkin("cell-1").await);
        assert_eq!(pool.available(), 0);
        assert_eq!(pool.in_use(), 0);

        // Re-checkin is a no-op (returns false).
        assert!(!pool.checkin("cell-1").await);
    }

    /// `checkin` with a non-matching `cell_id` is a no-op. This protects
    /// against a stale destroy from another cell accidentally releasing
    /// someone else's slot.
    #[tokio::test]
    async fn checkin_wrong_cell_id_is_noop() {
        let mut pool = FirecrackerPool::new(1);
        pool.slots[0] = PoolSlot::InUse {
            cell_id: "real-cell".to_string(),
        };
        assert!(!pool.checkin("imposter-cell").await);
        // Slot still InUse with the real cell.
        assert_eq!(pool.in_use(), 1);
        // The real cell can still check in.
        assert!(pool.checkin("real-cell").await);
        assert_eq!(pool.in_use(), 0);
    }

    /// `fill` against a non-existent firecracker binary path is a soft
    /// failure: the spawn fails, the slot stays `Empty`, and the call does
    /// not propagate an error (the pool is best-effort latency optimisation,
    /// not a correctness gate). Off-Linux `fill` is a documented no-op so
    /// the assertion is the same on every platform.
    #[tokio::test]
    async fn fill_with_missing_binary_leaves_slots_empty() {
        let mut pool = FirecrackerPool::new(2);
        pool.fill(
            "/nonexistent/firecracker",
            "/nonexistent/vmlinux",
            "/nonexistent/rootfs.ext4",
        )
        .await;
        // Either Linux-spawn-failure or off-Linux-noop leaves the slots Empty.
        assert_eq!(pool.available(), 0);
        assert_eq!(pool.in_use(), 0);
        assert_eq!(
            pool.slots
                .iter()
                .filter(|s| matches!(s, PoolSlot::Empty))
                .count(),
            2
        );
    }

    /// State-machine cycle test: hand-place two `Available` slots (one per
    /// snapshot pair on disk would be the production path; here we skip the
    /// firecracker spawn and pin the transition matrix directly). Drive
    /// `checkout` twice and confirm both succeed, the third returns `None`,
    /// then `checkin` cycles both back to `Empty` exactly once each.
    #[tokio::test]
    async fn checkout_checkin_cycle_two_slots() {
        let mut pool = FirecrackerPool::new(2);
        pool.slots[0] = PoolSlot::Available {
            snapshot_path: PathBuf::from("/tmp/snap-a"),
            mem_file_path: PathBuf::from("/tmp/snap-a.mem"),
            vm_id: "vm-a".into(),
        };
        pool.slots[1] = PoolSlot::Available {
            snapshot_path: PathBuf::from("/tmp/snap-b"),
            mem_file_path: PathBuf::from("/tmp/snap-b.mem"),
            vm_id: "vm-b".into(),
        };
        assert_eq!(pool.available(), 2);

        let p1 = pool.checkout("cell-1").await.expect("first checkout");
        let p2 = pool.checkout("cell-2").await.expect("second checkout");
        assert_ne!(p1, p2, "each cell got a distinct snapshot path");
        assert_eq!(pool.available(), 0);
        assert_eq!(pool.in_use(), 2);

        // Third checkout from a fully in-use pool is the cold-boot signal.
        assert!(pool.checkout("cell-3").await.is_none());

        assert!(pool.checkin("cell-1").await);
        assert!(pool.checkin("cell-2").await);
        assert_eq!(pool.in_use(), 0);
        // checkin transitions to Empty (not Available) — the next fill()
        // re-populates from a fresh boot, because a VM that ran a workload
        // is no longer at the parked-init state.
        assert_eq!(
            pool.slots
                .iter()
                .filter(|s| matches!(s, PoolSlot::Empty))
                .count(),
            2
        );

        // Repeated checkin is a no-op (no slot in InUse matches).
        assert!(!pool.checkin("cell-1").await);
        assert!(!pool.checkin("cell-2").await);
    }

    /// `Available` slot carries the paired snapshot+mem paths verbatim
    /// through `checkout` — the caller needs the snapshot path to feed
    /// `restore_into`, and on the supervisor side the mem path is paired
    /// with it via the on-disk `<vm_id>.mem` convention. This pins the
    /// "snapshot path round-trips unchanged" contract that the
    /// `FirecrackerCellBackend::create` wiring relies on.
    #[tokio::test]
    async fn checkout_returns_snapshot_path_verbatim() {
        let mut pool = FirecrackerPool::new(1);
        pool.slots[0] = PoolSlot::Available {
            snapshot_path: PathBuf::from("/tmp/cellos-pool-X.snap"),
            mem_file_path: PathBuf::from("/tmp/cellos-pool-X.mem"),
            vm_id: "X".into(),
        };
        let got = pool.checkout("cell-X").await;
        assert_eq!(got, Some(PathBuf::from("/tmp/cellos-pool-X.snap")));
        // After checkout the slot is InUse{cell-X}.
        match &pool.slots[0] {
            PoolSlot::InUse { cell_id } => assert_eq!(cell_id, "cell-X"),
            other => panic!("expected InUse after checkout, got {other:?}"),
        }
    }

    /// `pool_size_from_env` returns 0 when the env var is unset. We can't
    /// reliably test the *set* path here (env mutation is racy across tests
    /// in the same process), but pinning the unset default is the gate that
    /// matters: if the env reader regressed to a non-zero default the warm
    /// pool would activate accidentally and changes in `create()` would take
    /// a different code path than expected.
    #[test]
    fn pool_size_from_env_defaults_to_zero_when_unset() {
        // Best-effort: only assert when the var is genuinely unset in this
        // test process. If a parallel test set it, skip — we'd rather skip
        // than be flaky.
        if std::env::var(POOL_SIZE_ENV).is_err() {
            assert_eq!(pool_size_from_env(), 0);
        }
    }
}