Skip to main content

squib_snapshot/
state.rs

1//! `MicrovmState` and child types — the snapshot state blob.
2//!
3//! Per [10-data-model.md §
4//! 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob) and [16-snapshots.md
5//! § 2](../../../specs/16-snapshots.md#2-state-file). These types are the source of truth for the
6//! on-disk format; the [`crate::envelope::Snapshot`] envelope wraps an instance of [`MicrovmState`]
7//! and the bitcode encoding pins the field order.
8//!
9//! All fields are `serde`-serializable. Adding a register, a device-state field, or
10//! an MMDS V2 token field is an additive contract: the snapshot version's `minor`
11//! must bump in lockstep with upstream Firecracker (D15 + D6); fields are never
12//! removed.
13
14use std::collections::BTreeMap;
15
16use serde::{Deserialize, Serialize};
17
18use crate::error::SnapshotError;
19
20/// Per-vCPU general-purpose register file.
21///
22/// Layout per [13-arch-and-boot.md §
23/// 8](../../../specs/13-arch-and-boot.md#8-vcpu-initial-registers). Encoding is by index —
24/// `regs[0]` is X0, `regs[30]` is X30. `sp`, `pc`, `pstate` are split out for readability.
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub struct GpRegs {
27    /// X0..X30 (31 registers).
28    pub x: [u64; 31],
29    /// SP_EL1 — used by the postcopy pre-warmer to resolve the boot stack page.
30    pub sp: u64,
31    /// PC at save time.
32    pub pc: u64,
33    /// PSTATE / SPSR_EL2.
34    pub pstate: u64,
35}
36
37/// Per-vCPU FP/SIMD register file (V0..V31, FPSR, FPCR).
38#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
39pub struct FpSimdRegs {
40    /// V0..V31 — 32 registers, each 128 bits (split into two `u64` for `serde`).
41    pub v: [[u64; 2]; 32],
42    /// FPSR — floating-point status register.
43    pub fpsr: u64,
44    /// FPCR — floating-point control register.
45    pub fpcr: u64,
46}
47
48/// PSCI affinity state for a vCPU at snapshot time.
49///
50/// On restore, every vCPU's PSCI state is normalized to BSP-running /
51/// secondaries-Off; this field is captured for diagnostics and so the restore
52/// orchestrator knows which vCPUs were live before save.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
54pub enum PsciVcpuState {
55    /// vCPU was active.
56    On,
57    /// vCPU was parked (CPU_OFF / never CPU_ON-ed).
58    #[default]
59    Off,
60    /// vCPU is mid-bringup (CPU_ON received, primary not yet running).
61    OnPending,
62}
63
64/// Per-vCPU state blob.
65///
66/// Pin per [10-data-model.md §
67/// 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob).
68#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
69pub struct VcpuState {
70    /// MPIDR_EL1 affinity bits assigned at create time.
71    pub mpidr: u64,
72    /// General-purpose register file.
73    pub regs: GpRegs,
74    /// FP/SIMD register file.
75    pub fp_regs: FpSimdRegs,
76    /// Curated sysreg subset — `BTreeMap<u64, u64>` so the encoding is order-stable.
77    /// The key is the [`squib_arch::SysReg`]'s `as_encoded()` value (a `u64` packed
78    /// representation).
79    pub sys_regs: BTreeMap<u64, u64>,
80    /// PSCI affinity state at save time.
81    pub psci_state: PsciVcpuState,
82}
83
84impl VcpuState {
85    /// Construct a fresh `VcpuState` for the given MPIDR with all registers cleared.
86    #[must_use]
87    pub fn new(mpidr: u64) -> Self {
88        Self {
89            mpidr,
90            regs: GpRegs::default(),
91            fp_regs: FpSimdRegs::default(),
92            sys_regs: BTreeMap::new(),
93            psci_state: PsciVcpuState::Off,
94        }
95    }
96}
97
98/// Opaque GIC state blob from `hv_gic_state_get_data`.
99///
100/// The shape is HVF-specific; restoring a snapshot taken on KVM (e.g. on Linux
101/// aarch64) into squib is explicitly not supported (D10), so the bytes flow through
102/// as opaque. The `len` field is redundant with `bytes.len()` on the wire but keeps
103/// `serde_json::to_value` (used for `--describe-snapshot`) self-documenting.
104#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
105pub struct GicState {
106    /// Length of the opaque blob (bytes).
107    pub len: u64,
108    /// The blob itself.
109    pub bytes: Vec<u8>,
110}
111
112impl GicState {
113    /// Build a `GicState` from the bytes returned by `hv_gic_state_get_data`.
114    #[must_use]
115    pub fn from_bytes(bytes: Vec<u8>) -> Self {
116        // `usize::try_from` to `u64` is infallible on 64-bit, but going through it
117        // surfaces the conversion in the type system — a 32-bit target would catch
118        // the overflow at compile time instead of silently truncating at runtime.
119        let len = u64::try_from(bytes.len()).unwrap_or(u64::MAX);
120        Self { len, bytes }
121    }
122}
123
124/// MMDS data store + V2 token store, captured at snapshot time.
125///
126/// The store is held as a **JSON-serialized string** (not `serde_json::Value`)
127/// because the snapshot envelope encodes via `bitcode`, which does not implement
128/// `deserialize_any` and therefore cannot decode `serde_json::Value` directly.
129/// Use [`Self::data_value`] / [`Self::with_data`] to round-trip through a
130/// structured value.
131#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
132pub struct MmdsState {
133    /// The MMDS data store, JSON-serialized. `"null"` for an empty store.
134    pub data_json: String,
135    /// V2 session-token TTL, in seconds. `None` means MMDS V1 was active.
136    pub token_ttl_seconds: Option<u32>,
137}
138
139impl MmdsState {
140    /// Build an `MmdsState` from a structured `serde_json::Value`.
141    ///
142    /// # Errors
143    /// Surfaces any `serde_json::Error` from re-serializing the value.
144    pub fn with_data(
145        value: &serde_json::Value,
146        token_ttl_seconds: Option<u32>,
147    ) -> Result<Self, serde_json::Error> {
148        Ok(Self {
149            data_json: serde_json::to_string(value)?,
150            token_ttl_seconds,
151        })
152    }
153
154    /// Decode `data_json` back to a `serde_json::Value`.
155    ///
156    /// # Errors
157    /// Surfaces any `serde_json::Error` from parsing.
158    pub fn data_value(&self) -> Result<serde_json::Value, serde_json::Error> {
159        if self.data_json.is_empty() {
160            return Ok(serde_json::Value::Null);
161        }
162        serde_json::from_str(&self.data_json)
163    }
164}
165
166/// Top-level VM info: `mem_size_mib`, `smt`, CPU template, boot source.
167///
168/// Captured separately from per-vCPU state so the loader can validate "would this
169/// state file fit on this host" before allocating guest RAM.
170#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
171pub struct VmInfo {
172    /// Configured guest RAM in MiB (matches `machine-config.mem_size_mib`).
173    pub mem_size_mib: u64,
174    /// Always `false` on aarch64 — SMT is not exposed to guests on Apple Silicon.
175    pub smt: bool,
176    /// CPU template name (e.g. `"V1N1"`); empty if the user did not pin one.
177    pub cpu_template: String,
178    /// Boot-source `kernel_image_path` recorded for diagnostics.
179    pub kernel_image_path: String,
180    /// Boot-source `initrd_path` if any.
181    pub initrd_path: Option<String>,
182    /// Effective `boot_args` after the D23 append-if-absent rules ran.
183    pub boot_args: String,
184    /// Whether dirty-page tracking was enabled when the snapshot was taken.
185    /// A `Diff` snapshot is only legal when this is `true` (I-SNAP-6).
186    pub track_dirty_pages: bool,
187}
188
189/// One device's saved state — config + virtqueue cursors.
190///
191/// Devices serialise their own state into bitcode-friendly shapes; the snapshot
192/// crate keeps the outer `BTreeMap<DeviceKey, DeviceState>` so the order is stable
193/// across save/restore (per virtio-MMIO slot). The inner blob is opaque.
194#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
195pub struct DeviceState {
196    /// Device-class name (`"virtio-block"`, `"virtio-net"`, ...).
197    pub kind: String,
198    /// Identifier the API surface uses (e.g. `drive_id`, `iface_id`).
199    pub id: String,
200    /// virtio-MMIO slot index this device occupied.
201    pub mmio_slot: u32,
202    /// Opaque per-device state — bitcode bytes the device emits.
203    pub blob: Vec<u8>,
204}
205
206/// All device states, keyed by `(mmio_slot, id)` for stable ordering.
207#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
208pub struct DeviceStates {
209    /// Devices in MMIO-slot order.
210    pub devices: Vec<DeviceState>,
211}
212
213impl DeviceStates {
214    /// Build a `DeviceStates` from an iterable of `DeviceState`.
215    ///
216    /// The constructor sorts by `(mmio_slot, id)` so the wire encoding is stable
217    /// across HashMap iteration order and across runs.
218    pub fn from_devices<I: IntoIterator<Item = DeviceState>>(iter: I) -> Self {
219        let mut devices: Vec<_> = iter.into_iter().collect();
220        devices.sort_by(|a, b| (a.mmio_slot, a.id.as_str()).cmp(&(b.mmio_slot, b.id.as_str())));
221        Self { devices }
222    }
223}
224
225/// The snapshot state blob — the `data` field of [`crate::envelope::Snapshot`].
226///
227/// Pin per [10-data-model.md §
228/// 5](../../../specs/10-data-model.md#5-microvmstate--the-snapshot-state-blob).
229#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
230pub struct MicrovmState {
231    /// Top-level VM info.
232    pub vm_info: VmInfo,
233    /// One per vCPU.
234    pub vcpu_states: Vec<VcpuState>,
235    /// Per-device state.
236    pub device_states: DeviceStates,
237    /// Opaque GIC blob.
238    pub gic_state: GicState,
239    /// MMDS state if MMDS was enabled.
240    pub mmds_state: Option<MmdsState>,
241}
242
243impl MicrovmState {
244    /// Squib-incompat sanity check — surfaces [`SnapshotError::Incompatible`] when
245    /// the loaded state could not have come from a squib-shaped VMM.
246    ///
247    /// Used by the loader after a successful magic + CRC + version check to give
248    /// `--describe-snapshot` a clean rejection of e.g. KVM-produced state files
249    /// that *happen* to deserialise structurally (D10).
250    ///
251    /// # Errors
252    /// [`SnapshotError::Incompatible`] if any structural invariant is violated.
253    pub fn verify_compatible(&self) -> Result<(), SnapshotError> {
254        // vCPU count must match the saved per-vCPU state (1..=32 per D19).
255        if self.vcpu_states.is_empty() || self.vcpu_states.len() > 32 {
256            return Err(SnapshotError::Incompatible);
257        }
258        // SMT must be false on aarch64 (D3).
259        if self.vm_info.smt {
260            return Err(SnapshotError::Incompatible);
261        }
262        // GIC blob must be non-empty for a saved-running VM.
263        if self.gic_state.bytes.is_empty() {
264            return Err(SnapshotError::Incompatible);
265        }
266        // GIC blob length redundancy must agree with the byte vector.
267        if usize::try_from(self.gic_state.len).unwrap_or(usize::MAX) != self.gic_state.bytes.len() {
268            return Err(SnapshotError::Incompatible);
269        }
270        Ok(())
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    fn round_trip<T: Serialize + serde::de::DeserializeOwned + PartialEq + std::fmt::Debug>(
279        value: &T,
280    ) -> T {
281        let bytes = bitcode::serialize(value).expect("encode");
282        bitcode::deserialize(&bytes).expect("decode")
283    }
284
285    #[test]
286    fn test_should_round_trip_default_microvm_state() {
287        let state = MicrovmState {
288            vm_info: VmInfo {
289                mem_size_mib: 256,
290                smt: false,
291                cpu_template: String::new(),
292                kernel_image_path: "/tmp/vmlinux".into(),
293                initrd_path: None,
294                boot_args: "console=ttyAMA0 panic=1".into(),
295                track_dirty_pages: false,
296            },
297            vcpu_states: vec![VcpuState::new(0)],
298            device_states: DeviceStates::default(),
299            gic_state: GicState::from_bytes(vec![1, 2, 3, 4]),
300            mmds_state: None,
301        };
302        let back = round_trip(&state);
303        assert_eq!(state, back);
304    }
305
306    #[test]
307    fn test_should_sort_device_states_by_slot_then_id() {
308        let states = DeviceStates::from_devices(vec![
309            DeviceState {
310                kind: "virtio-net".into(),
311                id: "eth0".into(),
312                mmio_slot: 2,
313                blob: vec![],
314            },
315            DeviceState {
316                kind: "virtio-block".into(),
317                id: "rootfs".into(),
318                mmio_slot: 0,
319                blob: vec![],
320            },
321            DeviceState {
322                kind: "virtio-block".into(),
323                id: "data".into(),
324                mmio_slot: 1,
325                blob: vec![],
326            },
327        ]);
328        assert_eq!(states.devices[0].id, "rootfs");
329        assert_eq!(states.devices[1].id, "data");
330        assert_eq!(states.devices[2].id, "eth0");
331    }
332
333    #[test]
334    fn test_should_reject_zero_vcpu_state() {
335        let state = MicrovmState {
336            vm_info: VmInfo {
337                mem_size_mib: 1,
338                smt: false,
339                cpu_template: String::new(),
340                kernel_image_path: String::new(),
341                initrd_path: None,
342                boot_args: String::new(),
343                track_dirty_pages: false,
344            },
345            vcpu_states: vec![],
346            device_states: DeviceStates::default(),
347            gic_state: GicState::from_bytes(vec![1]),
348            mmds_state: None,
349        };
350        assert!(matches!(
351            state.verify_compatible(),
352            Err(SnapshotError::Incompatible)
353        ));
354    }
355
356    #[test]
357    fn test_should_reject_smt_enabled() {
358        let state = MicrovmState {
359            vm_info: VmInfo {
360                mem_size_mib: 1,
361                smt: true,
362                cpu_template: String::new(),
363                kernel_image_path: String::new(),
364                initrd_path: None,
365                boot_args: String::new(),
366                track_dirty_pages: false,
367            },
368            vcpu_states: vec![VcpuState::new(0)],
369            device_states: DeviceStates::default(),
370            gic_state: GicState::from_bytes(vec![1]),
371            mmds_state: None,
372        };
373        assert!(matches!(
374            state.verify_compatible(),
375            Err(SnapshotError::Incompatible)
376        ));
377    }
378
379    #[test]
380    fn test_should_reject_empty_gic_blob() {
381        let state = MicrovmState {
382            vm_info: VmInfo::default(),
383            vcpu_states: vec![VcpuState::new(0)],
384            device_states: DeviceStates::default(),
385            gic_state: GicState::default(),
386            mmds_state: None,
387        };
388        assert!(matches!(
389            state.verify_compatible(),
390            Err(SnapshotError::Incompatible)
391        ));
392    }
393
394    #[test]
395    fn test_should_reject_gic_length_mismatch() {
396        let mut state = MicrovmState {
397            vm_info: VmInfo::default(),
398            vcpu_states: vec![VcpuState::new(0)],
399            device_states: DeviceStates::default(),
400            gic_state: GicState::from_bytes(vec![1, 2, 3]),
401            mmds_state: None,
402        };
403        state.gic_state.len = 99;
404        assert!(matches!(
405            state.verify_compatible(),
406            Err(SnapshotError::Incompatible)
407        ));
408    }
409
410    #[test]
411    fn test_should_round_trip_populated_mmds_state() {
412        // Regression: an earlier draft stored `data` as `serde_json::Value`,
413        // which bitcode cannot decode (requires `deserialize_any`). The smoke
414        // test caught this when the demo state file refused to round-trip.
415        // The fix stores MMDS as a JSON string; this test pins it.
416        let mmds = MmdsState::with_data(
417            &serde_json::json!({"latest": {"meta-data": {"instance-id": "demo"}}}),
418            Some(3600),
419        )
420        .unwrap();
421        let state = MicrovmState {
422            vm_info: VmInfo {
423                mem_size_mib: 64,
424                smt: false,
425                cpu_template: String::new(),
426                kernel_image_path: "/k".into(),
427                initrd_path: None,
428                boot_args: String::new(),
429                track_dirty_pages: false,
430            },
431            vcpu_states: vec![VcpuState::new(0)],
432            device_states: DeviceStates::default(),
433            gic_state: GicState::from_bytes(vec![0xAA; 16]),
434            mmds_state: Some(mmds),
435        };
436        let back = round_trip(&state);
437        let restored = back.mmds_state.expect("MMDS round-trip dropped");
438        assert_eq!(restored.token_ttl_seconds, Some(3600));
439        let value = restored.data_value().unwrap();
440        assert_eq!(value["latest"]["meta-data"]["instance-id"], "demo");
441    }
442
443    #[test]
444    fn test_should_round_trip_psci_state() {
445        for s in [
446            PsciVcpuState::On,
447            PsciVcpuState::Off,
448            PsciVcpuState::OnPending,
449        ] {
450            let back = round_trip(&s);
451            assert_eq!(back, s);
452        }
453    }
454}