squib_snapshot/state.rs
1//! `MicrovmState` and child types — the snapshot state blob.
2//!
3//! Per [10-data-model.md §
4//! 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob) and [16-snapshots.md
5//! § 2](../../../specs/16-snapshots.md#2-state-file). These types are the source of truth for the
6//! on-disk format; the [`crate::envelope::Snapshot`] envelope wraps an instance of [`MicrovmState`]
7//! and the bitcode encoding pins the field order.
8//!
9//! All fields are `serde`-serializable. Adding a register, a device-state field, or
10//! an MMDS V2 token field is an additive contract: the snapshot version's `minor`
11//! must bump in lockstep with upstream Firecracker (D15 + D6); fields are never
12//! removed.
13
14use std::collections::BTreeMap;
15
16use serde::{Deserialize, Serialize};
17
18use crate::error::SnapshotError;
19
20/// Per-vCPU general-purpose register file.
21///
22/// Layout per [13-arch-and-boot.md §
23/// 8](../../../specs/13-arch-and-boot.md#8-vcpu-initial-registers). Encoding is by index —
24/// `regs[0]` is X0, `regs[30]` is X30. `sp`, `pc`, `pstate` are split out for readability.
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub struct GpRegs {
27 /// X0..X30 (31 registers).
28 pub x: [u64; 31],
29 /// SP_EL1 — used by the postcopy pre-warmer to resolve the boot stack page.
30 pub sp: u64,
31 /// PC at save time.
32 pub pc: u64,
33 /// PSTATE / SPSR_EL2.
34 pub pstate: u64,
35}
36
37/// Per-vCPU FP/SIMD register file (V0..V31, FPSR, FPCR).
38#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
39pub struct FpSimdRegs {
40 /// V0..V31 — 32 registers, each 128 bits (split into two `u64` for `serde`).
41 pub v: [[u64; 2]; 32],
42 /// FPSR — floating-point status register.
43 pub fpsr: u64,
44 /// FPCR — floating-point control register.
45 pub fpcr: u64,
46}
47
48/// PSCI affinity state for a vCPU at snapshot time.
49///
50/// On restore, every vCPU's PSCI state is normalized to BSP-running /
51/// secondaries-Off; this field is captured for diagnostics and so the restore
52/// orchestrator knows which vCPUs were live before save.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
54pub enum PsciVcpuState {
55 /// vCPU was active.
56 On,
57 /// vCPU was parked (CPU_OFF / never CPU_ON-ed).
58 #[default]
59 Off,
60 /// vCPU is mid-bringup (CPU_ON received, primary not yet running).
61 OnPending,
62}
63
64/// Per-vCPU state blob.
65///
66/// Pin per [10-data-model.md §
67/// 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob).
68#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
69pub struct VcpuState {
70 /// MPIDR_EL1 affinity bits assigned at create time.
71 pub mpidr: u64,
72 /// General-purpose register file.
73 pub regs: GpRegs,
74 /// FP/SIMD register file.
75 pub fp_regs: FpSimdRegs,
76 /// Curated sysreg subset — `BTreeMap<u64, u64>` so the encoding is order-stable.
77 /// The key is the [`squib_arch::SysReg`]'s `as_encoded()` value (a `u64` packed
78 /// representation).
79 pub sys_regs: BTreeMap<u64, u64>,
80 /// PSCI affinity state at save time.
81 pub psci_state: PsciVcpuState,
82}
83
84impl VcpuState {
85 /// Construct a fresh `VcpuState` for the given MPIDR with all registers cleared.
86 #[must_use]
87 pub fn new(mpidr: u64) -> Self {
88 Self {
89 mpidr,
90 regs: GpRegs::default(),
91 fp_regs: FpSimdRegs::default(),
92 sys_regs: BTreeMap::new(),
93 psci_state: PsciVcpuState::Off,
94 }
95 }
96}
97
98/// Opaque GIC state blob from `hv_gic_state_get_data`.
99///
100/// The shape is HVF-specific; restoring a snapshot taken on KVM (e.g. on Linux
101/// aarch64) into squib is explicitly not supported (D10), so the bytes flow through
102/// as opaque. The `len` field is redundant with `bytes.len()` on the wire but keeps
103/// `serde_json::to_value` (used for `--describe-snapshot`) self-documenting.
104#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
105pub struct GicState {
106 /// Length of the opaque blob (bytes).
107 pub len: u64,
108 /// The blob itself.
109 pub bytes: Vec<u8>,
110}
111
112impl GicState {
113 /// Build a `GicState` from the bytes returned by `hv_gic_state_get_data`.
114 #[must_use]
115 pub fn from_bytes(bytes: Vec<u8>) -> Self {
116 // `usize::try_from` to `u64` is infallible on 64-bit, but going through it
117 // surfaces the conversion in the type system — a 32-bit target would catch
118 // the overflow at compile time instead of silently truncating at runtime.
119 let len = u64::try_from(bytes.len()).unwrap_or(u64::MAX);
120 Self { len, bytes }
121 }
122}
123
124/// MMDS data store + V2 token store, captured at snapshot time.
125///
126/// The store is held as a **JSON-serialized string** (not `serde_json::Value`)
127/// because the snapshot envelope encodes via `bitcode`, which does not implement
128/// `deserialize_any` and therefore cannot decode `serde_json::Value` directly.
129/// Use [`Self::data_value`] / [`Self::with_data`] to round-trip through a
130/// structured value.
131#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
132pub struct MmdsState {
133 /// The MMDS data store, JSON-serialized. `"null"` for an empty store.
134 pub data_json: String,
135 /// V2 session-token TTL, in seconds. `None` means MMDS V1 was active.
136 pub token_ttl_seconds: Option<u32>,
137}
138
139impl MmdsState {
140 /// Build an `MmdsState` from a structured `serde_json::Value`.
141 ///
142 /// # Errors
143 /// Surfaces any `serde_json::Error` from re-serializing the value.
144 pub fn with_data(
145 value: &serde_json::Value,
146 token_ttl_seconds: Option<u32>,
147 ) -> Result<Self, serde_json::Error> {
148 Ok(Self {
149 data_json: serde_json::to_string(value)?,
150 token_ttl_seconds,
151 })
152 }
153
154 /// Decode `data_json` back to a `serde_json::Value`.
155 ///
156 /// # Errors
157 /// Surfaces any `serde_json::Error` from parsing.
158 pub fn data_value(&self) -> Result<serde_json::Value, serde_json::Error> {
159 if self.data_json.is_empty() {
160 return Ok(serde_json::Value::Null);
161 }
162 serde_json::from_str(&self.data_json)
163 }
164}
165
166/// Top-level VM info: `mem_size_mib`, `smt`, CPU template, boot source.
167///
168/// Captured separately from per-vCPU state so the loader can validate "would this
169/// state file fit on this host" before allocating guest RAM.
170#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
171pub struct VmInfo {
172 /// Configured guest RAM in MiB (matches `machine-config.mem_size_mib`).
173 pub mem_size_mib: u64,
174 /// Always `false` on aarch64 — SMT is not exposed to guests on Apple Silicon.
175 pub smt: bool,
176 /// CPU template name (e.g. `"V1N1"`); empty if the user did not pin one.
177 pub cpu_template: String,
178 /// Boot-source `kernel_image_path` recorded for diagnostics.
179 pub kernel_image_path: String,
180 /// Boot-source `initrd_path` if any.
181 pub initrd_path: Option<String>,
182 /// Effective `boot_args` after the D23 append-if-absent rules ran.
183 pub boot_args: String,
184 /// Whether dirty-page tracking was enabled when the snapshot was taken.
185 /// A `Diff` snapshot is only legal when this is `true` (I-SNAP-6).
186 pub track_dirty_pages: bool,
187}
188
189/// One device's saved state — config + virtqueue cursors.
190///
191/// Devices serialise their own state into bitcode-friendly shapes; the snapshot
192/// crate keeps the outer `BTreeMap<DeviceKey, DeviceState>` so the order is stable
193/// across save/restore (per virtio-MMIO slot). The inner blob is opaque.
194#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
195pub struct DeviceState {
196 /// Device-class name (`"virtio-block"`, `"virtio-net"`, ...).
197 pub kind: String,
198 /// Identifier the API surface uses (e.g. `drive_id`, `iface_id`).
199 pub id: String,
200 /// virtio-MMIO slot index this device occupied.
201 pub mmio_slot: u32,
202 /// Opaque per-device state — bitcode bytes the device emits.
203 pub blob: Vec<u8>,
204}
205
206/// All device states, keyed by `(mmio_slot, id)` for stable ordering.
207#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
208pub struct DeviceStates {
209 /// Devices in MMIO-slot order.
210 pub devices: Vec<DeviceState>,
211}
212
213impl DeviceStates {
214 /// Build a `DeviceStates` from an iterable of `DeviceState`.
215 ///
216 /// The constructor sorts by `(mmio_slot, id)` so the wire encoding is stable
217 /// across HashMap iteration order and across runs.
218 pub fn from_devices<I: IntoIterator<Item = DeviceState>>(iter: I) -> Self {
219 let mut devices: Vec<_> = iter.into_iter().collect();
220 devices.sort_by(|a, b| (a.mmio_slot, a.id.as_str()).cmp(&(b.mmio_slot, b.id.as_str())));
221 Self { devices }
222 }
223}
224
225/// The snapshot state blob — the `data` field of [`crate::envelope::Snapshot`].
226///
227/// Pin per [10-data-model.md §
228/// 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob).
229#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
230pub struct MicrovmState {
231 /// Top-level VM info.
232 pub vm_info: VmInfo,
233 /// One per vCPU.
234 pub vcpu_states: Vec<VcpuState>,
235 /// Per-device state.
236 pub device_states: DeviceStates,
237 /// Opaque GIC blob.
238 pub gic_state: GicState,
239 /// MMDS state if MMDS was enabled.
240 pub mmds_state: Option<MmdsState>,
241}
242
243impl MicrovmState {
244 /// Squib-incompat sanity check — surfaces [`SnapshotError::Incompatible`] when
245 /// the loaded state could not have come from a squib-shaped VMM.
246 ///
247 /// Used by the loader after a successful magic + CRC + version check to give
248 /// `--describe-snapshot` a clean rejection of e.g. KVM-produced state files
249 /// that *happen* to deserialise structurally (D10).
250 ///
251 /// # Errors
252 /// [`SnapshotError::Incompatible`] if any structural invariant is violated.
253 pub fn verify_compatible(&self) -> Result<(), SnapshotError> {
254 // vCPU count must match the saved per-vCPU state (1..=32 per D19).
255 if self.vcpu_states.is_empty() || self.vcpu_states.len() > 32 {
256 return Err(SnapshotError::Incompatible);
257 }
258 // SMT must be false on aarch64 (D3).
259 if self.vm_info.smt {
260 return Err(SnapshotError::Incompatible);
261 }
262 // GIC blob must be non-empty for a saved-running VM.
263 if self.gic_state.bytes.is_empty() {
264 return Err(SnapshotError::Incompatible);
265 }
266 // GIC blob length redundancy must agree with the byte vector.
267 if usize::try_from(self.gic_state.len).unwrap_or(usize::MAX) != self.gic_state.bytes.len() {
268 return Err(SnapshotError::Incompatible);
269 }
270 Ok(())
271 }
272}
273
274#[cfg(test)]
275mod tests {
276 use super::*;
277
278 fn round_trip<T: Serialize + serde::de::DeserializeOwned + PartialEq + std::fmt::Debug>(
279 value: &T,
280 ) -> T {
281 let bytes = bitcode::serialize(value).expect("encode");
282 bitcode::deserialize(&bytes).expect("decode")
283 }
284
285 #[test]
286 fn test_should_round_trip_default_microvm_state() {
287 let state = MicrovmState {
288 vm_info: VmInfo {
289 mem_size_mib: 256,
290 smt: false,
291 cpu_template: String::new(),
292 kernel_image_path: "/tmp/vmlinux".into(),
293 initrd_path: None,
294 boot_args: "console=ttyAMA0 panic=1".into(),
295 track_dirty_pages: false,
296 },
297 vcpu_states: vec![VcpuState::new(0)],
298 device_states: DeviceStates::default(),
299 gic_state: GicState::from_bytes(vec![1, 2, 3, 4]),
300 mmds_state: None,
301 };
302 let back = round_trip(&state);
303 assert_eq!(state, back);
304 }
305
306 #[test]
307 fn test_should_sort_device_states_by_slot_then_id() {
308 let states = DeviceStates::from_devices(vec![
309 DeviceState {
310 kind: "virtio-net".into(),
311 id: "eth0".into(),
312 mmio_slot: 2,
313 blob: vec![],
314 },
315 DeviceState {
316 kind: "virtio-block".into(),
317 id: "rootfs".into(),
318 mmio_slot: 0,
319 blob: vec![],
320 },
321 DeviceState {
322 kind: "virtio-block".into(),
323 id: "data".into(),
324 mmio_slot: 1,
325 blob: vec![],
326 },
327 ]);
328 assert_eq!(states.devices[0].id, "rootfs");
329 assert_eq!(states.devices[1].id, "data");
330 assert_eq!(states.devices[2].id, "eth0");
331 }
332
333 #[test]
334 fn test_should_reject_zero_vcpu_state() {
335 let state = MicrovmState {
336 vm_info: VmInfo {
337 mem_size_mib: 1,
338 smt: false,
339 cpu_template: String::new(),
340 kernel_image_path: String::new(),
341 initrd_path: None,
342 boot_args: String::new(),
343 track_dirty_pages: false,
344 },
345 vcpu_states: vec![],
346 device_states: DeviceStates::default(),
347 gic_state: GicState::from_bytes(vec![1]),
348 mmds_state: None,
349 };
350 assert!(matches!(
351 state.verify_compatible(),
352 Err(SnapshotError::Incompatible)
353 ));
354 }
355
356 #[test]
357 fn test_should_reject_smt_enabled() {
358 let state = MicrovmState {
359 vm_info: VmInfo {
360 mem_size_mib: 1,
361 smt: true,
362 cpu_template: String::new(),
363 kernel_image_path: String::new(),
364 initrd_path: None,
365 boot_args: String::new(),
366 track_dirty_pages: false,
367 },
368 vcpu_states: vec![VcpuState::new(0)],
369 device_states: DeviceStates::default(),
370 gic_state: GicState::from_bytes(vec![1]),
371 mmds_state: None,
372 };
373 assert!(matches!(
374 state.verify_compatible(),
375 Err(SnapshotError::Incompatible)
376 ));
377 }
378
379 #[test]
380 fn test_should_reject_empty_gic_blob() {
381 let state = MicrovmState {
382 vm_info: VmInfo::default(),
383 vcpu_states: vec![VcpuState::new(0)],
384 device_states: DeviceStates::default(),
385 gic_state: GicState::default(),
386 mmds_state: None,
387 };
388 assert!(matches!(
389 state.verify_compatible(),
390 Err(SnapshotError::Incompatible)
391 ));
392 }
393
394 #[test]
395 fn test_should_reject_gic_length_mismatch() {
396 let mut state = MicrovmState {
397 vm_info: VmInfo::default(),
398 vcpu_states: vec![VcpuState::new(0)],
399 device_states: DeviceStates::default(),
400 gic_state: GicState::from_bytes(vec![1, 2, 3]),
401 mmds_state: None,
402 };
403 state.gic_state.len = 99;
404 assert!(matches!(
405 state.verify_compatible(),
406 Err(SnapshotError::Incompatible)
407 ));
408 }
409
410 #[test]
411 fn test_should_round_trip_populated_mmds_state() {
412 // Regression: an earlier draft stored `data` as `serde_json::Value`,
413 // which bitcode cannot decode (requires `deserialize_any`). The smoke
414 // test caught this when the demo state file refused to round-trip.
415 // The fix stores MMDS as a JSON string; this test pins it.
416 let mmds = MmdsState::with_data(
417 &serde_json::json!({"latest": {"meta-data": {"instance-id": "demo"}}}),
418 Some(3600),
419 )
420 .unwrap();
421 let state = MicrovmState {
422 vm_info: VmInfo {
423 mem_size_mib: 64,
424 smt: false,
425 cpu_template: String::new(),
426 kernel_image_path: "/k".into(),
427 initrd_path: None,
428 boot_args: String::new(),
429 track_dirty_pages: false,
430 },
431 vcpu_states: vec![VcpuState::new(0)],
432 device_states: DeviceStates::default(),
433 gic_state: GicState::from_bytes(vec![0xAA; 16]),
434 mmds_state: Some(mmds),
435 };
436 let back = round_trip(&state);
437 let restored = back.mmds_state.expect("MMDS round-trip dropped");
438 assert_eq!(restored.token_ttl_seconds, Some(3600));
439 let value = restored.data_value().unwrap();
440 assert_eq!(value["latest"]["meta-data"]["instance-id"], "demo");
441 }
442
443 #[test]
444 fn test_should_round_trip_psci_state() {
445 for s in [
446 PsciVcpuState::On,
447 PsciVcpuState::Off,
448 PsciVcpuState::OnPending,
449 ] {
450 let back = round_trip(&s);
451 assert_eq!(back, s);
452 }
453 }
454}