Skip to main content

squib_api/
controller.rs

1//! `RuntimeApiController` — channel to VMM + lock-free read mirror + timeout taxonomy.
2//!
3//! Per [20-firecracker-api.md §
4//! 5](../../../specs/20-firecracker-api.md#5-channel-to-vmm-and-the-read-only-fast-path):
5//!
6//! - `ArcSwap<ControllerSnapshot>` lock-free read mirror; written by the VMM event loop on every
7//!   state transition, read by every `GET` handler.
8//! - `tokio::sync::mpsc::Sender<(ApiAction, oneshot::Sender<ApiResponse>)>` single-writer channel
9//!   into the VMM event loop. Bounded (capacity 1024 per CLAUDE.md § Async).
10//! - Per-action-class `tokio::time::timeout` (D26); on timeout we surface 504 and log the
11//!   still-pending action at `error`.
12//!
13//! Pre-boot vs post-boot admissibility is checked synchronously against the
14//! `LifecyclePhase` carried in `ControllerSnapshot` — no VMM round-trip needed for
15//! rejection.
16
17#![deny(
18    clippy::unwrap_used,
19    clippy::expect_used,
20    clippy::indexing_slicing,
21    clippy::panic
22)]
23
24use std::{sync::Arc, time::Duration};
25
26use arc_swap::ArcSwap;
27use parking_lot::Mutex;
28use squib_core::LifecyclePhase;
29use tokio::sync::{mpsc, oneshot};
30use tracing::error;
31
32use crate::{
33    action::{ActionClass, ApiAction, ApiResponse},
34    error::ApiError,
35    schemas::{InstanceAction, InstanceInfo, MAX_DRIVES, MAX_NICS, MAX_PMEM, VmState},
36};
37
38/// Per-class `tokio::time::timeout` budget per
39/// [70-security.md § 6](../../../specs/70-security.md#6-resource-limits).
40#[derive(Debug, Clone, Copy)]
41pub struct TimeoutTable {
42    /// Pre-boot configuration mutations. Default 5 s.
43    pub pre_boot_config: Duration,
44    /// `Action(InstanceStart)`. Default 30 s.
45    pub instance_start: Duration,
46    /// `PUT /snapshot/create`. Default 5 min.
47    pub snapshot_create: Duration,
48    /// `PUT /snapshot/load`. Default 5 min.
49    pub snapshot_load: Duration,
50    /// `PATCH /vm`. Default 5 s.
51    pub vm_state_change: Duration,
52    /// `PATCH /balloon` resize. Default 30 s.
53    pub balloon_resize: Duration,
54    /// Other actions (e.g. `FlushMetrics`). Default 5 s.
55    pub other: Duration,
56}
57
58impl TimeoutTable {
59    /// Defaults from the spec.
60    #[must_use]
61    pub const fn from_spec() -> Self {
62        Self {
63            pre_boot_config: Duration::from_secs(5),
64            instance_start: Duration::from_secs(30),
65            snapshot_create: Duration::from_mins(5),
66            snapshot_load: Duration::from_mins(5),
67            vm_state_change: Duration::from_secs(5),
68            balloon_resize: Duration::from_secs(30),
69            other: Duration::from_secs(5),
70        }
71    }
72
73    /// Look up the budget for a given action class.
74    #[must_use]
75    pub const fn for_class(&self, class: ActionClass) -> Duration {
76        match class {
77            ActionClass::PreBootConfig => self.pre_boot_config,
78            ActionClass::InstanceStart => self.instance_start,
79            ActionClass::SnapshotCreate => self.snapshot_create,
80            ActionClass::SnapshotLoad => self.snapshot_load,
81            ActionClass::VmStateChange => self.vm_state_change,
82            ActionClass::BalloonResize => self.balloon_resize,
83            ActionClass::Other => self.other,
84        }
85    }
86}
87
88impl Default for TimeoutTable {
89    fn default() -> Self {
90        Self::from_spec()
91    }
92}
93
94/// Lock-free read mirror surfaced via every `GET` handler.
95#[derive(Debug, Clone)]
96pub struct ControllerSnapshot {
97    /// Body of `GET /` (already collapsed to the upstream three-value vocabulary).
98    pub instance_info: InstanceInfo,
99    /// `firecracker_version` returned by `GET /version`.
100    pub firecracker_version: String,
101    /// Materialised `VmmConfig` for `GET /vm/config` — opaque JSON tree at this layer
102    /// (the VMM populates it from validated typed configs).
103    pub vm_config: Arc<serde_json::Value>,
104    /// Internal lifecycle phase. Never serialized to the wire.
105    pub phase: LifecyclePhase,
106}
107
108impl ControllerSnapshot {
109    /// Build a snapshot for a freshly-launched VMM (no boot yet).
110    pub fn new(
111        instance_id: impl Into<String>,
112        firecracker_version: impl Into<String>,
113        vmm_version: impl Into<String>,
114    ) -> Self {
115        let firecracker_version = firecracker_version.into();
116        Self {
117            instance_info: InstanceInfo {
118                id: instance_id.into(),
119                state: VmState::NotStarted,
120                vmm_version: vmm_version.into(),
121                app_name: "Firecracker".into(),
122            },
123            firecracker_version,
124            vm_config: Arc::new(serde_json::json!({})),
125            phase: LifecyclePhase::Uninitialized,
126        }
127    }
128}
129
130/// Channel sender type used by mutating handlers.
131pub type ActionSender = mpsc::Sender<(ApiAction, oneshot::Sender<ApiResponse>)>;
132
133/// Channel receiver type owned by the VMM event loop.
134pub type ActionReceiver = mpsc::Receiver<(ApiAction, oneshot::Sender<ApiResponse>)>;
135
136/// Cross-field limits that the controller enforces synchronously, *before* an action is
137/// forwarded to the VMM event loop. These are the upper bounds that the per-field
138/// `Raw* → Validated TryFrom` conversions cannot enforce because they need the running
139/// machine state (host RAM cap, configured `mem_size_mib`, running counts).
140///
141/// Per `93-improvements-review.md` Phase 2 entry — fix shape "thread `BackendCapabilities`
142/// into the controller, add `MachineConfig::validate_against_host(...)` invoked at
143/// dispatch time before the action reaches the VMM event loop".
144#[derive(Debug)]
145pub struct LimitsState {
146    /// Host RAM (MiB). Defaults to a generous-but-finite value so tests have a fixed
147    /// upper bound; production builds set this from the live `BackendCapabilities`.
148    pub host_ram_mib: u64,
149    /// Most recently configured `mem_size_mib`. `None` until the first
150    /// `PUT /machine-config` lands.
151    pub mem_size_mib: Option<u64>,
152    /// Drives the controller has accepted (≤ [`MAX_DRIVES`] = 8).
153    pub running_drives: u32,
154    /// Network interfaces accepted (≤ [`MAX_NICS`] = 8).
155    pub running_nics: u32,
156    /// pmem devices accepted (≤ [`MAX_PMEM`] = 4).
157    pub running_pmem: u32,
158}
159
160impl LimitsState {
161    /// Default limits — host RAM caps generously high so that vanilla unit tests keep
162    /// passing without injecting a `BackendCapabilities` mock. Production builds
163    /// override `host_ram_mib` from the live HVF `BackendCapabilities` snapshot.
164    #[must_use]
165    pub fn from_host_ram_mib(host_ram_mib: u64) -> Self {
166        Self {
167            host_ram_mib,
168            mem_size_mib: None,
169            running_drives: 0,
170            running_nics: 0,
171            running_pmem: 0,
172        }
173    }
174}
175
176impl Default for LimitsState {
177    fn default() -> Self {
178        // 1 TiB; "any realistic Apple Silicon Mac" will be far below this. The
179        // controller surfaces the real number from BackendCapabilities once the
180        // HVF backend is plumbed.
181        Self::from_host_ram_mib(1024 * 1024)
182    }
183}
184
185/// Controller surfaced to handlers. Written by the VMM event loop, read by every
186/// handler.
187#[derive(Debug)]
188pub struct RuntimeApiController {
189    snapshot: ArcSwap<ControllerSnapshot>,
190    vmm_tx: ActionSender,
191    timeouts: TimeoutTable,
192    limits: Mutex<LimitsState>,
193}
194
195impl RuntimeApiController {
196    /// Build a controller paired with a VMM event loop receiver. The receiver must be
197    /// drained by the VMM (or a stub for tests / Phase 2 wiring).
198    ///
199    /// `capacity` is the bounded mpsc capacity; CLAUDE.md § Async recommends 1024.
200    #[must_use]
201    pub fn new(
202        snapshot: ControllerSnapshot,
203        timeouts: TimeoutTable,
204        capacity: usize,
205    ) -> (Self, ActionReceiver) {
206        Self::new_with_limits(snapshot, timeouts, capacity, LimitsState::default())
207    }
208
209    /// Build a controller with explicit cross-field limits — the production caller
210    /// derives `host_ram_mib` from the live HVF `BackendCapabilities`; tests use the
211    /// default 1 TiB cap.
212    #[must_use]
213    pub fn new_with_limits(
214        snapshot: ControllerSnapshot,
215        timeouts: TimeoutTable,
216        capacity: usize,
217        limits: LimitsState,
218    ) -> (Self, ActionReceiver) {
219        let (tx, rx) = mpsc::channel(capacity);
220        let controller = Self {
221            snapshot: ArcSwap::from(Arc::new(snapshot)),
222            vmm_tx: tx,
223            timeouts,
224            limits: Mutex::new(limits),
225        };
226        (controller, rx)
227    }
228
229    /// Expose the current limits snapshot — tests use this to verify counter
230    /// progression after PUTs land.
231    #[must_use]
232    pub fn limits_snapshot(&self) -> LimitsSnapshot {
233        let g = self.limits.lock();
234        LimitsSnapshot {
235            host_ram_mib: g.host_ram_mib,
236            mem_size_mib: g.mem_size_mib,
237            running_drives: g.running_drives,
238            running_nics: g.running_nics,
239            running_pmem: g.running_pmem,
240        }
241    }
242
243    /// Cross-field admissibility check against the running machine state.
244    /// Runs *after* `validate_phase` and *before* the VMM channel is touched.
245    fn validate_cross_field(&self, action: &ApiAction) -> Result<(), ApiError> {
246        let g = self.limits.lock();
247        match action {
248            ApiAction::PutMachineConfig(cfg) => {
249                let req = cfg.mem_size_mib.get();
250                if req > g.host_ram_mib {
251                    return Err(ApiError::BadRequest(format!(
252                        "mem_size_mib={req} exceeds host RAM cap of {host} MiB",
253                        host = g.host_ram_mib,
254                    )));
255                }
256            }
257            ApiAction::PutBalloon(b) => {
258                cross_check_balloon(b.amount_mib, g.mem_size_mib)?;
259            }
260            ApiAction::PatchBalloon(u) => {
261                cross_check_balloon(u.amount_mib, g.mem_size_mib)?;
262            }
263            ApiAction::PutDrive(_) if u64::from(g.running_drives) >= MAX_DRIVES_AS_U64 => {
264                return Err(ApiError::BadRequest(format!(
265                    "drives: per-class cap {MAX_DRIVES} exceeded"
266                )));
267            }
268            ApiAction::PutNetwork(_) if u64::from(g.running_nics) >= MAX_NICS_AS_U64 => {
269                return Err(ApiError::BadRequest(format!(
270                    "network_interfaces: per-class cap {MAX_NICS} exceeded"
271                )));
272            }
273            ApiAction::PutPmem(_) if u64::from(g.running_pmem) >= MAX_PMEM_AS_U64 => {
274                return Err(ApiError::BadRequest(format!(
275                    "pmem: per-class cap {MAX_PMEM} exceeded"
276                )));
277            }
278            _ => {}
279        }
280        Ok(())
281    }
282}
283
284/// Captures the *kind* of state mutation an action would commit on success, so the
285/// dispatch path can apply it after the VMM event loop returns a non-fault response.
286/// Capturing the kind (rather than holding `&action`) lets us forward the owned
287/// `ApiAction` into the channel and still know what to bump on success.
288#[derive(Debug, Clone, Copy)]
289enum ActionCounterKick {
290    None,
291    SetMemSize(u64),
292    AddDrive,
293    AddNic,
294    AddPmem,
295}
296
297impl ActionCounterKick {
298    fn for_action(action: &ApiAction) -> Self {
299        match action {
300            ApiAction::PutMachineConfig(cfg) => Self::SetMemSize(cfg.mem_size_mib.get()),
301            ApiAction::PutDrive(_) => Self::AddDrive,
302            ApiAction::PutNetwork(_) => Self::AddNic,
303            ApiAction::PutPmem(_) => Self::AddPmem,
304            _ => Self::None,
305        }
306    }
307
308    fn apply(self, ctl: &RuntimeApiController) {
309        if matches!(self, Self::None) {
310            return;
311        }
312        let mut g = ctl.limits.lock();
313        match self {
314            Self::SetMemSize(v) => g.mem_size_mib = Some(v),
315            Self::AddDrive => g.running_drives = g.running_drives.saturating_add(1),
316            Self::AddNic => g.running_nics = g.running_nics.saturating_add(1),
317            Self::AddPmem => g.running_pmem = g.running_pmem.saturating_add(1),
318            Self::None => {}
319        }
320    }
321}
322
323/// Read-only view of [`LimitsState`] for callers that want a momentary snapshot.
324#[derive(Debug, Clone, Copy, PartialEq, Eq)]
325pub struct LimitsSnapshot {
326    /// Host RAM cap (MiB).
327    pub host_ram_mib: u64,
328    /// Configured `mem_size_mib`; `None` before the first PUT.
329    pub mem_size_mib: Option<u64>,
330    /// Drives accepted so far.
331    pub running_drives: u32,
332    /// Network interfaces accepted so far.
333    pub running_nics: u32,
334    /// pmem devices accepted so far.
335    pub running_pmem: u32,
336}
337
338// `MAX_*` constants in `schemas::common` are `usize`; widening to `u64` once and
339// comparing against widened counters avoids the cast_possible_truncation lint without
340// hand-rolling a const-checked path.
341const MAX_DRIVES_AS_U64: u64 = MAX_DRIVES as u64;
342const MAX_NICS_AS_U64: u64 = MAX_NICS as u64;
343const MAX_PMEM_AS_U64: u64 = MAX_PMEM as u64;
344
345fn cross_check_balloon(amount_mib: u64, mem_size_mib: Option<u64>) -> Result<(), ApiError> {
346    // If `mem_size_mib` hasn't been configured yet (no PUT /machine-config), defer the
347    // bound check — upstream Firecracker accepts the balloon config at this point and
348    // re-validates it at `InstanceStart`. Squib mirrors that behaviour: only enforce
349    // the cap when both halves of the cross-field pair are present.
350    let Some(mem) = mem_size_mib else {
351        return Ok(());
352    };
353    // Upstream Firecracker `MAX_BALLOON_SIZE_MIB` is `mem_size_mib − 32`; squib mirrors
354    // it here. `mem - 32` saturates to zero on tiny VMs to avoid underflow surfacing as
355    // a confusing "exceeds u64::MAX − 32" message.
356    let max_balloon = mem.saturating_sub(32);
357    if amount_mib > max_balloon {
358        return Err(ApiError::BadRequest(format!(
359            "balloon amount_mib={amount_mib} exceeds max ({max_balloon} = mem_size_mib {mem} - 32)"
360        )));
361    }
362    Ok(())
363}
364
365impl RuntimeApiController {
366    /// Borrow the current snapshot. Returns an `Arc` so the caller can drop it without
367    /// holding a lock — this is the read-only fast path.
368    #[must_use]
369    pub fn snapshot(&self) -> Arc<ControllerSnapshot> {
370        self.snapshot.load_full()
371    }
372
373    /// Replace the snapshot atomically. Called by the VMM event loop on every state
374    /// transition; not exposed to handlers.
375    pub fn store_snapshot(&self, snap: ControllerSnapshot) {
376        self.snapshot.store(Arc::new(snap));
377    }
378
379    /// Validate admissibility synchronously against the cached lifecycle phase.
380    ///
381    /// Per spec § 5.2: pre-flight rejection runs before the channel is ever touched.
382    pub fn validate_phase(&self, action: &ApiAction) -> Result<(), ApiError> {
383        let phase = self.snapshot.load().phase;
384        // Two architectural rules: (a) `SendCtrlAltDel` is x86-only and rejected
385        // unconditionally (R row in the compat matrix). (b) `Shutdown` is always
386        // admissible.
387        if let ApiAction::Action(InstanceAction::SendCtrlAltDel) = action {
388            return Err(ApiError::BadRequest(
389                "Invalid action: SendCtrlAltDel is x86-only and not supported on aarch64".into(),
390            ));
391        }
392        if matches!(action, ApiAction::Shutdown) {
393            return Ok(());
394        }
395        // Ordinary admissibility: pre-boot before boot, post-boot after.
396        if phase.is_pre_boot() && !action.is_pre_boot() {
397            return Err(ApiError::BadRequest(
398                "The requested operation is not allowed before the microVM has booted".into(),
399            ));
400        }
401        if phase.is_post_boot() && !action.is_post_boot() {
402            return Err(ApiError::BadRequest(
403                "The requested operation is not supported after the microVM has booted".into(),
404            ));
405        }
406        if matches!(phase, LifecyclePhase::Starting) {
407            return Err(ApiError::BadRequest(
408                "The requested operation cannot be served during boot orchestration".into(),
409            ));
410        }
411        if matches!(phase, LifecyclePhase::Shutdown) {
412            return Err(ApiError::Internal("VMM is shut down".into()));
413        }
414        Ok(())
415    }
416
417    /// Dispatch an action to the VMM event loop. Applies the per-class timeout.
418    pub async fn dispatch(&self, action: ApiAction) -> Result<ApiResponse, ApiError> {
419        self.validate_phase(&action)?;
420        self.validate_cross_field(&action)?;
421        let class = action.class();
422        let timeout = self.timeouts.for_class(class);
423        let label = action.label();
424        // Take a peek at the action shape so we can apply the post-success counter
425        // mutation against an `ApiAction` we no longer own. Cheap because the
426        // discriminant + the small embedded fields we touch are cloneable.
427        let counter_kick = ActionCounterKick::for_action(&action);
428        let (resp_tx, resp_rx) = oneshot::channel();
429        self.vmm_tx
430            .send((action, resp_tx))
431            .await
432            .map_err(|_| ApiError::Internal("VMM event loop is gone".into()))?;
433        match tokio::time::timeout(timeout, resp_rx).await {
434            Ok(Ok(resp)) => {
435                if matches!(resp, ApiResponse::NoContent | ApiResponse::Json { .. }) {
436                    counter_kick.apply(self);
437                }
438                Ok(resp)
439            }
440            Ok(Err(_)) => Err(ApiError::Internal("VMM event loop is gone".into())),
441            Err(_) => {
442                error!(
443                    action = label,
444                    timeout_secs = timeout.as_secs(),
445                    "VMM action timed out; the action remains pending at the VMM",
446                );
447                Err(ApiError::Timeout(class.label()))
448            }
449        }
450    }
451
452    /// Borrow the underlying timeout table (used in tests).
453    #[must_use]
454    pub fn timeouts(&self) -> TimeoutTable {
455        self.timeouts
456    }
457
458    /// Borrow the action sender — used for tests that want to bypass `dispatch` to
459    /// drive the channel directly.
460    #[must_use]
461    pub fn action_sender(&self) -> ActionSender {
462        self.vmm_tx.clone()
463    }
464}
465
466#[cfg(test)]
467#[allow(
468    clippy::unwrap_used,
469    clippy::expect_used,
470    clippy::indexing_slicing,
471    clippy::panic
472)]
473mod tests {
474    use squib_core::LifecyclePhase;
475
476    use super::*;
477    use crate::schemas::{BootSourceConfig, EntropyConfig, VmStateChange};
478
479    fn ctl(phase: LifecyclePhase) -> (RuntimeApiController, ActionReceiver) {
480        let mut snap = ControllerSnapshot::new("anonymous", "1.16.0", "1.16.0 (squib 0.1.0)");
481        snap.phase = phase;
482        snap.instance_info.state = phase.wire_state().into();
483        RuntimeApiController::new(snap, TimeoutTable::from_spec(), 16)
484    }
485
486    fn boot_source() -> BootSourceConfig {
487        BootSourceConfig::try_from(crate::schemas::boot_source::RawBootSourceConfig {
488            kernel_image_path: "/tmp/k".into(),
489            initrd_path: None,
490            boot_args: None,
491        })
492        .unwrap()
493    }
494
495    #[test]
496    fn test_should_admit_pre_boot_action_in_uninitialized() {
497        let (c, _rx) = ctl(LifecyclePhase::Uninitialized);
498        let action = ApiAction::PutBootSource(boot_source());
499        c.validate_phase(&action).unwrap();
500    }
501
502    #[test]
503    fn test_should_reject_post_boot_action_in_uninitialized() {
504        let (c, _rx) = ctl(LifecyclePhase::Uninitialized);
505        let action = ApiAction::PatchVm(VmStateChange::Paused);
506        let err = c.validate_phase(&action).unwrap_err();
507        assert!(matches!(err, ApiError::BadRequest(_)));
508    }
509
510    #[test]
511    fn test_should_reject_pre_boot_action_in_running() {
512        let (c, _rx) = ctl(LifecyclePhase::Running);
513        let action = ApiAction::PutEntropy(EntropyConfig::default());
514        let err = c.validate_phase(&action).unwrap_err();
515        assert!(matches!(err, ApiError::BadRequest(_)));
516    }
517
518    #[test]
519    fn test_should_admit_pause_in_running() {
520        let (c, _rx) = ctl(LifecyclePhase::Running);
521        let action = ApiAction::PatchVm(VmStateChange::Paused);
522        c.validate_phase(&action).unwrap();
523    }
524
525    #[test]
526    fn test_should_reject_send_ctrl_alt_del_with_upstream_message() {
527        let (c, _rx) = ctl(LifecyclePhase::Running);
528        let action = ApiAction::Action(InstanceAction::SendCtrlAltDel);
529        let err = c.validate_phase(&action).unwrap_err();
530        assert!(matches!(err, ApiError::BadRequest(_)));
531        assert!(err.fault_message().contains("SendCtrlAltDel"));
532    }
533
534    #[test]
535    fn test_should_reject_anything_in_shutdown() {
536        let (c, _rx) = ctl(LifecyclePhase::Shutdown);
537        let action = ApiAction::PutEntropy(EntropyConfig::default());
538        let err = c.validate_phase(&action).unwrap_err();
539        assert!(matches!(err, ApiError::Internal(_)));
540    }
541
542    #[test]
543    fn test_should_reject_during_starting_phase() {
544        let (c, _rx) = ctl(LifecyclePhase::Starting);
545        let action = ApiAction::PutEntropy(EntropyConfig::default());
546        assert!(c.validate_phase(&action).is_err());
547    }
548
549    #[tokio::test]
550    async fn test_should_surface_504_on_action_timeout() {
551        // Build a controller whose pre-boot timeout is 50 ms and never drain the
552        // receiver — the dispatch must surface a Timeout(504).
553        let mut snap = ControllerSnapshot::new("anonymous", "1.16.0", "1.16.0 (squib test)");
554        snap.phase = LifecyclePhase::Uninitialized;
555        snap.instance_info.state = VmState::NotStarted;
556        let mut t = TimeoutTable::from_spec();
557        t.pre_boot_config = Duration::from_millis(50);
558        let (c, _rx) = RuntimeApiController::new(snap, t, 16);
559        let action = ApiAction::PutBootSource(boot_source());
560        let res = c.dispatch(action).await;
561        assert!(matches!(res, Err(ApiError::Timeout(_))));
562    }
563
564    #[tokio::test]
565    async fn test_should_dispatch_to_vmm_and_return_no_content() {
566        let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
567        let action = ApiAction::PutBootSource(boot_source());
568
569        // Spawn a task that drains the channel and acks 204.
570        tokio::spawn(async move {
571            if let Some((_action, ack)) = rx.recv().await {
572                let _ = ack.send(ApiResponse::NoContent);
573            }
574        });
575
576        let resp = c.dispatch(action).await.unwrap();
577        assert!(matches!(resp, ApiResponse::NoContent));
578    }
579
580    #[tokio::test]
581    async fn test_should_surface_500_when_event_loop_drops_response() {
582        let (c, rx) = ctl(LifecyclePhase::Uninitialized);
583        let action = ApiAction::PutBootSource(boot_source());
584
585        // Drop the receiver first — drains the channel and drops oneshot senders.
586        tokio::spawn(async move {
587            let mut rx = rx;
588            if let Some((_action, ack)) = rx.recv().await {
589                drop(ack);
590            }
591        });
592
593        let res = c.dispatch(action).await;
594        assert!(matches!(res, Err(ApiError::Internal(_))));
595    }
596
597    fn machine_cfg(mem_mib: u64) -> crate::schemas::MachineConfig {
598        crate::schemas::MachineConfig::try_from(crate::schemas::machine_config::RawMachineConfig {
599            vcpu_count: 1,
600            mem_size_mib: mem_mib,
601            smt: false,
602            track_dirty_pages: false,
603            cpu_template: None,
604            huge_pages: None,
605        })
606        .unwrap()
607    }
608
609    fn drive_cfg(id: &str) -> crate::schemas::DriveConfig {
610        crate::schemas::DriveConfig::try_from(crate::schemas::drive::RawDriveConfig {
611            drive_id: id.into(),
612            path_on_host: format!("/tmp/{id}.img"),
613            is_root_device: false,
614            is_read_only: true,
615            cache_type: crate::schemas::drive::CacheType::Unsafe,
616            io_engine: crate::schemas::drive::IoEngine::default(),
617            partuuid: None,
618            rate_limiter: None,
619            socket: None,
620        })
621        .unwrap()
622    }
623
624    fn balloon_cfg(amount_mib: u64) -> crate::schemas::BalloonConfig {
625        crate::schemas::BalloonConfig::try_from(crate::schemas::balloon::RawBalloonConfig {
626            amount_mib,
627            deflate_on_oom: false,
628            stats_polling_interval_s: 0,
629            free_page_hinting: false,
630            free_page_reporting: false,
631        })
632        .unwrap()
633    }
634
635    #[tokio::test]
636    async fn test_should_reject_machine_config_above_host_ram_cap() {
637        let mut snap = ControllerSnapshot::new("test", "1.16.0", "1.16.0 (squib test)");
638        snap.phase = LifecyclePhase::Uninitialized;
639        let limits = LimitsState::from_host_ram_mib(256);
640        let (c, _rx) =
641            RuntimeApiController::new_with_limits(snap, TimeoutTable::from_spec(), 16, limits);
642        let action = ApiAction::PutMachineConfig(machine_cfg(1024));
643        let err = c.dispatch(action).await.unwrap_err();
644        assert!(matches!(err, ApiError::BadRequest(_)));
645        assert!(err.fault_message().contains("host RAM cap"));
646    }
647
648    #[tokio::test]
649    async fn test_should_reject_balloon_above_mem_minus_32() {
650        let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
651        // First, set mem_size to 256 via a successful PutMachineConfig.
652        tokio::spawn(async move {
653            while let Some((_action, ack)) = rx.recv().await {
654                let _ = ack.send(ApiResponse::NoContent);
655            }
656        });
657        c.dispatch(ApiAction::PutMachineConfig(machine_cfg(256)))
658            .await
659            .unwrap();
660        assert_eq!(c.limits_snapshot().mem_size_mib, Some(256));
661
662        // 256 - 32 = 224; a 256 MiB balloon overflows by 32.
663        let err = c
664            .dispatch(ApiAction::PutBalloon(balloon_cfg(256)))
665            .await
666            .unwrap_err();
667        assert!(matches!(err, ApiError::BadRequest(_)));
668        assert!(err.fault_message().contains("exceeds max"));
669    }
670
671    #[tokio::test]
672    async fn test_should_defer_balloon_cap_check_when_mem_size_not_yet_set() {
673        // Without a prior PutMachineConfig the controller cannot evaluate
674        // `mem_size_mib - 32`; rather than rejecting (which would diverge from
675        // upstream Firecracker semantics), it defers the cap check to boot.
676        let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
677        tokio::spawn(async move {
678            while let Some((_action, ack)) = rx.recv().await {
679                let _ = ack.send(ApiResponse::NoContent);
680            }
681        });
682        let resp = c
683            .dispatch(ApiAction::PutBalloon(balloon_cfg(64)))
684            .await
685            .unwrap();
686        assert!(matches!(resp, ApiResponse::NoContent));
687    }
688
689    #[tokio::test]
690    async fn test_should_enforce_drives_class_cap_via_running_count() {
691        let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
692        tokio::spawn(async move {
693            while let Some((_action, ack)) = rx.recv().await {
694                let _ = ack.send(ApiResponse::NoContent);
695            }
696        });
697        // Eight successful drives bump the counter to MAX_DRIVES.
698        for i in 0..8 {
699            c.dispatch(ApiAction::PutDrive(drive_cfg(&format!("d{i}"))))
700                .await
701                .unwrap();
702        }
703        assert_eq!(c.limits_snapshot().running_drives, 8);
704        // Ninth must reject without round-tripping the channel.
705        let err = c
706            .dispatch(ApiAction::PutDrive(drive_cfg("d9")))
707            .await
708            .unwrap_err();
709        assert!(matches!(err, ApiError::BadRequest(_)));
710        assert!(err.fault_message().contains("drives"));
711    }
712
713    #[tokio::test]
714    async fn test_should_not_bump_running_count_on_vmm_fault() {
715        let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
716        tokio::spawn(async move {
717            while let Some((_action, ack)) = rx.recv().await {
718                let _ = ack.send(ApiResponse::Fault {
719                    status: 400,
720                    fault_message: "stub VMM rejected this".into(),
721                });
722            }
723        });
724        let _ = c
725            .dispatch(ApiAction::PutDrive(drive_cfg("d0")))
726            .await
727            .unwrap();
728        // The VMM faulted → running_drives must remain 0.
729        assert_eq!(c.limits_snapshot().running_drives, 0);
730    }
731
732    #[test]
733    fn test_should_apply_default_timeouts_per_spec() {
734        let t = TimeoutTable::from_spec();
735        assert_eq!(
736            t.for_class(ActionClass::PreBootConfig),
737            Duration::from_secs(5)
738        );
739        assert_eq!(
740            t.for_class(ActionClass::InstanceStart),
741            Duration::from_secs(30)
742        );
743        assert_eq!(
744            t.for_class(ActionClass::SnapshotCreate),
745            Duration::from_mins(5)
746        );
747        assert_eq!(
748            t.for_class(ActionClass::SnapshotLoad),
749            Duration::from_mins(5)
750        );
751    }
752}