1#![deny(
18 clippy::unwrap_used,
19 clippy::expect_used,
20 clippy::indexing_slicing,
21 clippy::panic
22)]
23
24use std::{sync::Arc, time::Duration};
25
26use arc_swap::ArcSwap;
27use parking_lot::Mutex;
28use squib_core::LifecyclePhase;
29use tokio::sync::{mpsc, oneshot};
30use tracing::error;
31
32use crate::{
33 action::{ActionClass, ApiAction, ApiResponse},
34 error::ApiError,
35 schemas::{InstanceAction, InstanceInfo, MAX_DRIVES, MAX_NICS, MAX_PMEM, VmState},
36};
37
38#[derive(Debug, Clone, Copy)]
41pub struct TimeoutTable {
42 pub pre_boot_config: Duration,
44 pub instance_start: Duration,
46 pub snapshot_create: Duration,
48 pub snapshot_load: Duration,
50 pub vm_state_change: Duration,
52 pub balloon_resize: Duration,
54 pub other: Duration,
56}
57
58impl TimeoutTable {
59 #[must_use]
61 pub const fn from_spec() -> Self {
62 Self {
63 pre_boot_config: Duration::from_secs(5),
64 instance_start: Duration::from_secs(30),
65 snapshot_create: Duration::from_mins(5),
66 snapshot_load: Duration::from_mins(5),
67 vm_state_change: Duration::from_secs(5),
68 balloon_resize: Duration::from_secs(30),
69 other: Duration::from_secs(5),
70 }
71 }
72
73 #[must_use]
75 pub const fn for_class(&self, class: ActionClass) -> Duration {
76 match class {
77 ActionClass::PreBootConfig => self.pre_boot_config,
78 ActionClass::InstanceStart => self.instance_start,
79 ActionClass::SnapshotCreate => self.snapshot_create,
80 ActionClass::SnapshotLoad => self.snapshot_load,
81 ActionClass::VmStateChange => self.vm_state_change,
82 ActionClass::BalloonResize => self.balloon_resize,
83 ActionClass::Other => self.other,
84 }
85 }
86}
87
88impl Default for TimeoutTable {
89 fn default() -> Self {
90 Self::from_spec()
91 }
92}
93
94#[derive(Debug, Clone)]
96pub struct ControllerSnapshot {
97 pub instance_info: InstanceInfo,
99 pub firecracker_version: String,
101 pub vm_config: Arc<serde_json::Value>,
104 pub phase: LifecyclePhase,
106}
107
108impl ControllerSnapshot {
109 pub fn new(
111 instance_id: impl Into<String>,
112 firecracker_version: impl Into<String>,
113 vmm_version: impl Into<String>,
114 ) -> Self {
115 let firecracker_version = firecracker_version.into();
116 Self {
117 instance_info: InstanceInfo {
118 id: instance_id.into(),
119 state: VmState::NotStarted,
120 vmm_version: vmm_version.into(),
121 app_name: "Firecracker".into(),
122 },
123 firecracker_version,
124 vm_config: Arc::new(serde_json::json!({})),
125 phase: LifecyclePhase::Uninitialized,
126 }
127 }
128}
129
130pub type ActionSender = mpsc::Sender<(ApiAction, oneshot::Sender<ApiResponse>)>;
132
133pub type ActionReceiver = mpsc::Receiver<(ApiAction, oneshot::Sender<ApiResponse>)>;
135
136#[derive(Debug)]
145pub struct LimitsState {
146 pub host_ram_mib: u64,
149 pub mem_size_mib: Option<u64>,
152 pub running_drives: u32,
154 pub running_nics: u32,
156 pub running_pmem: u32,
158}
159
160impl LimitsState {
161 #[must_use]
165 pub fn from_host_ram_mib(host_ram_mib: u64) -> Self {
166 Self {
167 host_ram_mib,
168 mem_size_mib: None,
169 running_drives: 0,
170 running_nics: 0,
171 running_pmem: 0,
172 }
173 }
174}
175
176impl Default for LimitsState {
177 fn default() -> Self {
178 Self::from_host_ram_mib(1024 * 1024)
182 }
183}
184
185#[derive(Debug)]
188pub struct RuntimeApiController {
189 snapshot: ArcSwap<ControllerSnapshot>,
190 vmm_tx: ActionSender,
191 timeouts: TimeoutTable,
192 limits: Mutex<LimitsState>,
193}
194
195impl RuntimeApiController {
196 #[must_use]
201 pub fn new(
202 snapshot: ControllerSnapshot,
203 timeouts: TimeoutTable,
204 capacity: usize,
205 ) -> (Self, ActionReceiver) {
206 Self::new_with_limits(snapshot, timeouts, capacity, LimitsState::default())
207 }
208
209 #[must_use]
213 pub fn new_with_limits(
214 snapshot: ControllerSnapshot,
215 timeouts: TimeoutTable,
216 capacity: usize,
217 limits: LimitsState,
218 ) -> (Self, ActionReceiver) {
219 let (tx, rx) = mpsc::channel(capacity);
220 let controller = Self {
221 snapshot: ArcSwap::from(Arc::new(snapshot)),
222 vmm_tx: tx,
223 timeouts,
224 limits: Mutex::new(limits),
225 };
226 (controller, rx)
227 }
228
229 #[must_use]
232 pub fn limits_snapshot(&self) -> LimitsSnapshot {
233 let g = self.limits.lock();
234 LimitsSnapshot {
235 host_ram_mib: g.host_ram_mib,
236 mem_size_mib: g.mem_size_mib,
237 running_drives: g.running_drives,
238 running_nics: g.running_nics,
239 running_pmem: g.running_pmem,
240 }
241 }
242
243 fn validate_cross_field(&self, action: &ApiAction) -> Result<(), ApiError> {
246 let g = self.limits.lock();
247 match action {
248 ApiAction::PutMachineConfig(cfg) => {
249 let req = cfg.mem_size_mib.get();
250 if req > g.host_ram_mib {
251 return Err(ApiError::BadRequest(format!(
252 "mem_size_mib={req} exceeds host RAM cap of {host} MiB",
253 host = g.host_ram_mib,
254 )));
255 }
256 }
257 ApiAction::PutBalloon(b) => {
258 cross_check_balloon(b.amount_mib, g.mem_size_mib)?;
259 }
260 ApiAction::PatchBalloon(u) => {
261 cross_check_balloon(u.amount_mib, g.mem_size_mib)?;
262 }
263 ApiAction::PutDrive(_) if u64::from(g.running_drives) >= MAX_DRIVES_AS_U64 => {
264 return Err(ApiError::BadRequest(format!(
265 "drives: per-class cap {MAX_DRIVES} exceeded"
266 )));
267 }
268 ApiAction::PutNetwork(_) if u64::from(g.running_nics) >= MAX_NICS_AS_U64 => {
269 return Err(ApiError::BadRequest(format!(
270 "network_interfaces: per-class cap {MAX_NICS} exceeded"
271 )));
272 }
273 ApiAction::PutPmem(_) if u64::from(g.running_pmem) >= MAX_PMEM_AS_U64 => {
274 return Err(ApiError::BadRequest(format!(
275 "pmem: per-class cap {MAX_PMEM} exceeded"
276 )));
277 }
278 _ => {}
279 }
280 Ok(())
281 }
282}
283
284#[derive(Debug, Clone, Copy)]
289enum ActionCounterKick {
290 None,
291 SetMemSize(u64),
292 AddDrive,
293 AddNic,
294 AddPmem,
295}
296
297impl ActionCounterKick {
298 fn for_action(action: &ApiAction) -> Self {
299 match action {
300 ApiAction::PutMachineConfig(cfg) => Self::SetMemSize(cfg.mem_size_mib.get()),
301 ApiAction::PutDrive(_) => Self::AddDrive,
302 ApiAction::PutNetwork(_) => Self::AddNic,
303 ApiAction::PutPmem(_) => Self::AddPmem,
304 _ => Self::None,
305 }
306 }
307
308 fn apply(self, ctl: &RuntimeApiController) {
309 if matches!(self, Self::None) {
310 return;
311 }
312 let mut g = ctl.limits.lock();
313 match self {
314 Self::SetMemSize(v) => g.mem_size_mib = Some(v),
315 Self::AddDrive => g.running_drives = g.running_drives.saturating_add(1),
316 Self::AddNic => g.running_nics = g.running_nics.saturating_add(1),
317 Self::AddPmem => g.running_pmem = g.running_pmem.saturating_add(1),
318 Self::None => {}
319 }
320 }
321}
322
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
325pub struct LimitsSnapshot {
326 pub host_ram_mib: u64,
328 pub mem_size_mib: Option<u64>,
330 pub running_drives: u32,
332 pub running_nics: u32,
334 pub running_pmem: u32,
336}
337
338const MAX_DRIVES_AS_U64: u64 = MAX_DRIVES as u64;
342const MAX_NICS_AS_U64: u64 = MAX_NICS as u64;
343const MAX_PMEM_AS_U64: u64 = MAX_PMEM as u64;
344
345fn cross_check_balloon(amount_mib: u64, mem_size_mib: Option<u64>) -> Result<(), ApiError> {
346 let Some(mem) = mem_size_mib else {
351 return Ok(());
352 };
353 let max_balloon = mem.saturating_sub(32);
357 if amount_mib > max_balloon {
358 return Err(ApiError::BadRequest(format!(
359 "balloon amount_mib={amount_mib} exceeds max ({max_balloon} = mem_size_mib {mem} - 32)"
360 )));
361 }
362 Ok(())
363}
364
365impl RuntimeApiController {
366 #[must_use]
369 pub fn snapshot(&self) -> Arc<ControllerSnapshot> {
370 self.snapshot.load_full()
371 }
372
373 pub fn store_snapshot(&self, snap: ControllerSnapshot) {
376 self.snapshot.store(Arc::new(snap));
377 }
378
379 pub fn validate_phase(&self, action: &ApiAction) -> Result<(), ApiError> {
383 let phase = self.snapshot.load().phase;
384 if let ApiAction::Action(InstanceAction::SendCtrlAltDel) = action {
388 return Err(ApiError::BadRequest(
389 "Invalid action: SendCtrlAltDel is x86-only and not supported on aarch64".into(),
390 ));
391 }
392 if matches!(action, ApiAction::Shutdown) {
393 return Ok(());
394 }
395 if phase.is_pre_boot() && !action.is_pre_boot() {
397 return Err(ApiError::BadRequest(
398 "The requested operation is not allowed before the microVM has booted".into(),
399 ));
400 }
401 if phase.is_post_boot() && !action.is_post_boot() {
402 return Err(ApiError::BadRequest(
403 "The requested operation is not supported after the microVM has booted".into(),
404 ));
405 }
406 if matches!(phase, LifecyclePhase::Starting) {
407 return Err(ApiError::BadRequest(
408 "The requested operation cannot be served during boot orchestration".into(),
409 ));
410 }
411 if matches!(phase, LifecyclePhase::Shutdown) {
412 return Err(ApiError::Internal("VMM is shut down".into()));
413 }
414 Ok(())
415 }
416
417 pub async fn dispatch(&self, action: ApiAction) -> Result<ApiResponse, ApiError> {
419 self.validate_phase(&action)?;
420 self.validate_cross_field(&action)?;
421 let class = action.class();
422 let timeout = self.timeouts.for_class(class);
423 let label = action.label();
424 let counter_kick = ActionCounterKick::for_action(&action);
428 let (resp_tx, resp_rx) = oneshot::channel();
429 self.vmm_tx
430 .send((action, resp_tx))
431 .await
432 .map_err(|_| ApiError::Internal("VMM event loop is gone".into()))?;
433 match tokio::time::timeout(timeout, resp_rx).await {
434 Ok(Ok(resp)) => {
435 if matches!(resp, ApiResponse::NoContent | ApiResponse::Json { .. }) {
436 counter_kick.apply(self);
437 }
438 Ok(resp)
439 }
440 Ok(Err(_)) => Err(ApiError::Internal("VMM event loop is gone".into())),
441 Err(_) => {
442 error!(
443 action = label,
444 timeout_secs = timeout.as_secs(),
445 "VMM action timed out; the action remains pending at the VMM",
446 );
447 Err(ApiError::Timeout(class.label()))
448 }
449 }
450 }
451
452 #[must_use]
454 pub fn timeouts(&self) -> TimeoutTable {
455 self.timeouts
456 }
457
458 #[must_use]
461 pub fn action_sender(&self) -> ActionSender {
462 self.vmm_tx.clone()
463 }
464}
465
466#[cfg(test)]
467#[allow(
468 clippy::unwrap_used,
469 clippy::expect_used,
470 clippy::indexing_slicing,
471 clippy::panic
472)]
473mod tests {
474 use squib_core::LifecyclePhase;
475
476 use super::*;
477 use crate::schemas::{BootSourceConfig, EntropyConfig, VmStateChange};
478
479 fn ctl(phase: LifecyclePhase) -> (RuntimeApiController, ActionReceiver) {
480 let mut snap = ControllerSnapshot::new("anonymous", "1.16.0", "1.16.0 (squib 0.1.0)");
481 snap.phase = phase;
482 snap.instance_info.state = phase.wire_state().into();
483 RuntimeApiController::new(snap, TimeoutTable::from_spec(), 16)
484 }
485
486 fn boot_source() -> BootSourceConfig {
487 BootSourceConfig::try_from(crate::schemas::boot_source::RawBootSourceConfig {
488 kernel_image_path: "/tmp/k".into(),
489 initrd_path: None,
490 boot_args: None,
491 })
492 .unwrap()
493 }
494
495 #[test]
496 fn test_should_admit_pre_boot_action_in_uninitialized() {
497 let (c, _rx) = ctl(LifecyclePhase::Uninitialized);
498 let action = ApiAction::PutBootSource(boot_source());
499 c.validate_phase(&action).unwrap();
500 }
501
502 #[test]
503 fn test_should_reject_post_boot_action_in_uninitialized() {
504 let (c, _rx) = ctl(LifecyclePhase::Uninitialized);
505 let action = ApiAction::PatchVm(VmStateChange::Paused);
506 let err = c.validate_phase(&action).unwrap_err();
507 assert!(matches!(err, ApiError::BadRequest(_)));
508 }
509
510 #[test]
511 fn test_should_reject_pre_boot_action_in_running() {
512 let (c, _rx) = ctl(LifecyclePhase::Running);
513 let action = ApiAction::PutEntropy(EntropyConfig::default());
514 let err = c.validate_phase(&action).unwrap_err();
515 assert!(matches!(err, ApiError::BadRequest(_)));
516 }
517
518 #[test]
519 fn test_should_admit_pause_in_running() {
520 let (c, _rx) = ctl(LifecyclePhase::Running);
521 let action = ApiAction::PatchVm(VmStateChange::Paused);
522 c.validate_phase(&action).unwrap();
523 }
524
525 #[test]
526 fn test_should_reject_send_ctrl_alt_del_with_upstream_message() {
527 let (c, _rx) = ctl(LifecyclePhase::Running);
528 let action = ApiAction::Action(InstanceAction::SendCtrlAltDel);
529 let err = c.validate_phase(&action).unwrap_err();
530 assert!(matches!(err, ApiError::BadRequest(_)));
531 assert!(err.fault_message().contains("SendCtrlAltDel"));
532 }
533
534 #[test]
535 fn test_should_reject_anything_in_shutdown() {
536 let (c, _rx) = ctl(LifecyclePhase::Shutdown);
537 let action = ApiAction::PutEntropy(EntropyConfig::default());
538 let err = c.validate_phase(&action).unwrap_err();
539 assert!(matches!(err, ApiError::Internal(_)));
540 }
541
542 #[test]
543 fn test_should_reject_during_starting_phase() {
544 let (c, _rx) = ctl(LifecyclePhase::Starting);
545 let action = ApiAction::PutEntropy(EntropyConfig::default());
546 assert!(c.validate_phase(&action).is_err());
547 }
548
549 #[tokio::test]
550 async fn test_should_surface_504_on_action_timeout() {
551 let mut snap = ControllerSnapshot::new("anonymous", "1.16.0", "1.16.0 (squib test)");
554 snap.phase = LifecyclePhase::Uninitialized;
555 snap.instance_info.state = VmState::NotStarted;
556 let mut t = TimeoutTable::from_spec();
557 t.pre_boot_config = Duration::from_millis(50);
558 let (c, _rx) = RuntimeApiController::new(snap, t, 16);
559 let action = ApiAction::PutBootSource(boot_source());
560 let res = c.dispatch(action).await;
561 assert!(matches!(res, Err(ApiError::Timeout(_))));
562 }
563
564 #[tokio::test]
565 async fn test_should_dispatch_to_vmm_and_return_no_content() {
566 let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
567 let action = ApiAction::PutBootSource(boot_source());
568
569 tokio::spawn(async move {
571 if let Some((_action, ack)) = rx.recv().await {
572 let _ = ack.send(ApiResponse::NoContent);
573 }
574 });
575
576 let resp = c.dispatch(action).await.unwrap();
577 assert!(matches!(resp, ApiResponse::NoContent));
578 }
579
580 #[tokio::test]
581 async fn test_should_surface_500_when_event_loop_drops_response() {
582 let (c, rx) = ctl(LifecyclePhase::Uninitialized);
583 let action = ApiAction::PutBootSource(boot_source());
584
585 tokio::spawn(async move {
587 let mut rx = rx;
588 if let Some((_action, ack)) = rx.recv().await {
589 drop(ack);
590 }
591 });
592
593 let res = c.dispatch(action).await;
594 assert!(matches!(res, Err(ApiError::Internal(_))));
595 }
596
597 fn machine_cfg(mem_mib: u64) -> crate::schemas::MachineConfig {
598 crate::schemas::MachineConfig::try_from(crate::schemas::machine_config::RawMachineConfig {
599 vcpu_count: 1,
600 mem_size_mib: mem_mib,
601 smt: false,
602 track_dirty_pages: false,
603 cpu_template: None,
604 huge_pages: None,
605 })
606 .unwrap()
607 }
608
609 fn drive_cfg(id: &str) -> crate::schemas::DriveConfig {
610 crate::schemas::DriveConfig::try_from(crate::schemas::drive::RawDriveConfig {
611 drive_id: id.into(),
612 path_on_host: format!("/tmp/{id}.img"),
613 is_root_device: false,
614 is_read_only: true,
615 cache_type: crate::schemas::drive::CacheType::Unsafe,
616 io_engine: crate::schemas::drive::IoEngine::default(),
617 partuuid: None,
618 rate_limiter: None,
619 socket: None,
620 })
621 .unwrap()
622 }
623
624 fn balloon_cfg(amount_mib: u64) -> crate::schemas::BalloonConfig {
625 crate::schemas::BalloonConfig::try_from(crate::schemas::balloon::RawBalloonConfig {
626 amount_mib,
627 deflate_on_oom: false,
628 stats_polling_interval_s: 0,
629 free_page_hinting: false,
630 free_page_reporting: false,
631 })
632 .unwrap()
633 }
634
635 #[tokio::test]
636 async fn test_should_reject_machine_config_above_host_ram_cap() {
637 let mut snap = ControllerSnapshot::new("test", "1.16.0", "1.16.0 (squib test)");
638 snap.phase = LifecyclePhase::Uninitialized;
639 let limits = LimitsState::from_host_ram_mib(256);
640 let (c, _rx) =
641 RuntimeApiController::new_with_limits(snap, TimeoutTable::from_spec(), 16, limits);
642 let action = ApiAction::PutMachineConfig(machine_cfg(1024));
643 let err = c.dispatch(action).await.unwrap_err();
644 assert!(matches!(err, ApiError::BadRequest(_)));
645 assert!(err.fault_message().contains("host RAM cap"));
646 }
647
648 #[tokio::test]
649 async fn test_should_reject_balloon_above_mem_minus_32() {
650 let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
651 tokio::spawn(async move {
653 while let Some((_action, ack)) = rx.recv().await {
654 let _ = ack.send(ApiResponse::NoContent);
655 }
656 });
657 c.dispatch(ApiAction::PutMachineConfig(machine_cfg(256)))
658 .await
659 .unwrap();
660 assert_eq!(c.limits_snapshot().mem_size_mib, Some(256));
661
662 let err = c
664 .dispatch(ApiAction::PutBalloon(balloon_cfg(256)))
665 .await
666 .unwrap_err();
667 assert!(matches!(err, ApiError::BadRequest(_)));
668 assert!(err.fault_message().contains("exceeds max"));
669 }
670
671 #[tokio::test]
672 async fn test_should_defer_balloon_cap_check_when_mem_size_not_yet_set() {
673 let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
677 tokio::spawn(async move {
678 while let Some((_action, ack)) = rx.recv().await {
679 let _ = ack.send(ApiResponse::NoContent);
680 }
681 });
682 let resp = c
683 .dispatch(ApiAction::PutBalloon(balloon_cfg(64)))
684 .await
685 .unwrap();
686 assert!(matches!(resp, ApiResponse::NoContent));
687 }
688
689 #[tokio::test]
690 async fn test_should_enforce_drives_class_cap_via_running_count() {
691 let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
692 tokio::spawn(async move {
693 while let Some((_action, ack)) = rx.recv().await {
694 let _ = ack.send(ApiResponse::NoContent);
695 }
696 });
697 for i in 0..8 {
699 c.dispatch(ApiAction::PutDrive(drive_cfg(&format!("d{i}"))))
700 .await
701 .unwrap();
702 }
703 assert_eq!(c.limits_snapshot().running_drives, 8);
704 let err = c
706 .dispatch(ApiAction::PutDrive(drive_cfg("d9")))
707 .await
708 .unwrap_err();
709 assert!(matches!(err, ApiError::BadRequest(_)));
710 assert!(err.fault_message().contains("drives"));
711 }
712
713 #[tokio::test]
714 async fn test_should_not_bump_running_count_on_vmm_fault() {
715 let (c, mut rx) = ctl(LifecyclePhase::Uninitialized);
716 tokio::spawn(async move {
717 while let Some((_action, ack)) = rx.recv().await {
718 let _ = ack.send(ApiResponse::Fault {
719 status: 400,
720 fault_message: "stub VMM rejected this".into(),
721 });
722 }
723 });
724 let _ = c
725 .dispatch(ApiAction::PutDrive(drive_cfg("d0")))
726 .await
727 .unwrap();
728 assert_eq!(c.limits_snapshot().running_drives, 0);
730 }
731
732 #[test]
733 fn test_should_apply_default_timeouts_per_spec() {
734 let t = TimeoutTable::from_spec();
735 assert_eq!(
736 t.for_class(ActionClass::PreBootConfig),
737 Duration::from_secs(5)
738 );
739 assert_eq!(
740 t.for_class(ActionClass::InstanceStart),
741 Duration::from_secs(30)
742 );
743 assert_eq!(
744 t.for_class(ActionClass::SnapshotCreate),
745 Duration::from_mins(5)
746 );
747 assert_eq!(
748 t.for_class(ActionClass::SnapshotLoad),
749 Duration::from_mins(5)
750 );
751 }
752}