#![cfg(all(target_os = "macos", target_arch = "aarch64"))]
use std::io::{Read, Write};
use crate::devices::virtio::vsock::muxer::TsiListenerSnapshot;
pub use crate::hvf::vcpu_snapshot::PerVcpuState;
use crate::hypervisor::{BackendError, HypervisorVcpu, HypervisorVm};
use crate::vmm::vstate::MicroVm;
const SNAPSHOT_MAGIC: [u8; 8] = *b"SMSNAP\x0a\x00";
const SNAPSHOT_VERSION: u64 = 11;
const RAM_PAGE_ALIGN: u64 = 16384;
const SPARSE_RAM_CHUNK: usize = 64 * 1024;
#[derive(Default)]
pub struct VirtioSnapshot {
pub devices: Vec<crate::snapshot_frame::DeviceRecord>,
pub vsock_listeners: Vec<TsiListenerSnapshot>,
}
pub struct Snapshot {
pub captured_host_ticks: u64,
pub captured_clock_ref: u64,
pub ram_gpa: u64,
pub memory: Vec<u8>,
pub intc_blob: Vec<u8>,
pub per_vcpu: Vec<PerVcpuState>,
pub virtio: VirtioSnapshot,
}
pub struct SnapshotWriteStats {
pub ram_bytes: u64,
pub ram_data_bytes: u64,
pub ram_zero_bytes: u64,
}
#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
pub struct SnapshotRestoreTimings {
pub ram_copy_us: u128,
pub gic_restore_us: u128,
pub vcpu_restore_us: u128,
pub vtimer_offset_us: u128,
pub applied_vtimer_offset: u64,
}
#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
pub struct SnapshotRestoreOptions {
pub skip_intc_blob: bool,
}
pub fn capture_snapshot(
vm: &MicroVm,
virtio: VirtioSnapshot,
) -> crate::hypervisor::ActiveResult<Snapshot> {
let per0 = vm.vcpu.capture_snapshot()?;
let intc_blob = vm.vm.capture_intc()?;
let mut memory = vec![0u8; vm.ram_size];
unsafe {
std::ptr::copy_nonoverlapping(vm.ram_host, memory.as_mut_ptr(), vm.ram_size);
}
let captured_host_ticks = crate::hypervisor::ActiveVm::host_monotonic_ticks();
let captured_clock_ref =
<crate::hypervisor::ActiveVcpu as crate::hypervisor::HypervisorVcpu>::capture_clock_ref(
&per0,
captured_host_ticks,
);
Ok(Snapshot {
captured_host_ticks,
captured_clock_ref,
ram_gpa: vm.ram_gpa,
memory,
intc_blob,
per_vcpu: vec![per0],
virtio,
})
}
pub fn restore_snapshot(vm: &MicroVm, snap: &Snapshot) -> crate::hypervisor::ActiveResult<()> {
restore_snapshot_timed(vm, snap).map(|_| ())
}
pub fn restore_snapshot_timed(
vm: &MicroVm,
snap: &Snapshot,
) -> crate::hypervisor::ActiveResult<SnapshotRestoreTimings> {
restore_snapshot_timed_with_options(vm, snap, SnapshotRestoreOptions::default())
}
pub fn restore_snapshot_timed_with_options(
vm: &MicroVm,
snap: &Snapshot,
options: SnapshotRestoreOptions,
) -> crate::hypervisor::ActiveResult<SnapshotRestoreTimings> {
let mut timings = SnapshotRestoreTimings::default();
if !snap.memory.is_empty() {
let t0 = std::time::Instant::now();
unsafe {
std::ptr::copy_nonoverlapping(
snap.memory.as_ptr(),
vm.ram_host,
vm.ram_size.min(snap.memory.len()),
);
}
timings.ram_copy_us = t0.elapsed().as_micros();
}
if !options.skip_intc_blob {
let t0 = std::time::Instant::now();
vm.vm.restore_intc(&snap.intc_blob)?;
timings.gic_restore_us = t0.elapsed().as_micros();
}
let boot_vcpu = snap
.per_vcpu
.first()
.ok_or(crate::hypervisor::ActiveError::other(
"vmm snapshot internal error",
))?;
let t0 = std::time::Instant::now();
vm.vcpu.restore_snapshot(boot_vcpu)?;
timings.vcpu_restore_us = t0.elapsed().as_micros();
let now = crate::hypervisor::ActiveVm::host_monotonic_ticks();
let t0 = std::time::Instant::now();
let new_offset = vm.vcpu.restore_clock(snap.captured_clock_ref, now)?;
timings.vtimer_offset_us = t0.elapsed().as_micros();
timings.applied_vtimer_offset = new_offset;
Ok(timings)
}
#[derive(Debug)]
pub enum FileError {
Io(std::io::Error),
BadMagic,
BadVersion(u64),
Malformed(&'static str),
Truncated,
}
impl From<std::io::Error> for FileError {
fn from(e: std::io::Error) -> Self {
Self::Io(e)
}
}
fn le_u16(bytes: &[u8]) -> Result<u16, FileError> {
crate::snapshot_frame::le_u16(bytes).ok_or(FileError::Truncated)
}
fn le_u32(bytes: &[u8]) -> Result<u32, FileError> {
crate::snapshot_frame::le_u32(bytes).ok_or(FileError::Truncated)
}
fn le_u64(bytes: &[u8]) -> Result<u64, FileError> {
crate::snapshot_frame::le_u64(bytes).ok_or(FileError::Truncated)
}
const TSI_LISTENER_RECORD_LEN: usize = 24;
fn write_tsi_listener_record(buf: &mut Vec<u8>, l: &TsiListenerSnapshot) {
l.write_to(buf).expect("Vec<u8> write is infallible");
}
fn read_tsi_listener_record(
e: &[u8; TSI_LISTENER_RECORD_LEN],
) -> Result<TsiListenerSnapshot, FileError> {
TsiListenerSnapshot::read_from(&mut &e[..]).map_err(|_| FileError::Truncated)
}
pub fn save_to_file(path: &str, snap: &Snapshot) -> Result<(), FileError> {
save_to_file_with_stats(path, snap).map(|_| ())
}
pub fn posix_fs_sidecar_path(snap_path: &str) -> String {
format!("{snap_path}.posixfs")
}
pub fn capture_and_write_posix_fs_sidecar(
snap_path: &str,
posix_fs: &[std::sync::Arc<crate::fuse::PosixFs>],
) {
if posix_fs.is_empty() {
return;
}
let blobs: Vec<Option<Vec<u8>>> = posix_fs
.iter()
.map(|pfs| Some(pfs.snapshot_state()))
.collect();
let sidecar_path = posix_fs_sidecar_path(snap_path);
if let Err(e) = write_posix_fs_sidecar(&sidecar_path, &blobs) {
eprintln!(
"supermachine: warning: posix-fs sidecar write to {sidecar_path} failed ({e:?}); \
warm restores from this snapshot will fall back to lazy LOOKUP and may EAI \
on paths not walked during warmup"
);
}
}
pub fn write_posix_fs_sidecar(
sidecar_path: &str,
blobs: &[Option<Vec<u8>>],
) -> Result<(), FileError> {
use std::io::Write;
let mut buf: Vec<u8> = Vec::with_capacity(
16 + blobs
.iter()
.map(|b| 4 + b.as_ref().map_or(0, |v| v.len()))
.sum::<usize>(),
);
buf.extend_from_slice(b"PFXS");
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&(blobs.len() as u32).to_le_bytes());
for b in blobs {
match b {
Some(v) => {
buf.extend_from_slice(&(v.len() as u32).to_le_bytes());
buf.extend_from_slice(v);
}
None => {
buf.extend_from_slice(&0u32.to_le_bytes());
}
}
}
let mut f = std::fs::File::create(sidecar_path).map_err(FileError::Io)?;
f.write_all(&buf).map_err(FileError::Io)?;
f.sync_all().map_err(FileError::Io)?;
Ok(())
}
pub fn dax_sidecar_path(snap_path: &str) -> String {
format!("{snap_path}.dax_sidecar")
}
pub fn smpark_sidecar_path(snap_path: &str) -> String {
format!("{snap_path}.smpark")
}
pub fn write_smpark_sidecar(snap_path: &str, gpa: u64) -> Result<(), FileError> {
use std::io::Write;
let mut buf = Vec::with_capacity(16);
buf.extend_from_slice(b"SMPK");
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&gpa.to_le_bytes());
let mut f = std::fs::File::create(smpark_sidecar_path(snap_path)).map_err(FileError::Io)?;
f.write_all(&buf).map_err(FileError::Io)?;
f.sync_all().map_err(FileError::Io)?;
Ok(())
}
pub fn capture_and_write_smpark_sidecar(gpa: u64, snap_path: &str) {
if gpa == 0 {
return;
}
if let Err(e) = write_smpark_sidecar(snap_path, gpa) {
eprintln!(
"supermachine: warning: smpark sidecar write to {} failed ({e:?}); \
multi-vCPU restores of this snapshot fall back to the agent-RPC \
unpark path instead of host-direct",
smpark_sidecar_path(snap_path)
);
}
}
pub fn read_smpark_sidecar(snap_path: &str) -> Option<u64> {
use std::io::Read;
let mut f = std::fs::File::open(smpark_sidecar_path(snap_path)).ok()?;
let mut buf = Vec::new();
f.read_to_end(&mut buf).ok()?;
if buf.len() < 16 || &buf[0..4] != b"SMPK" {
return None;
}
if u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]) != 1 {
return None;
}
Some(u64::from_le_bytes([
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
]))
}
pub fn write_dax_sidecar(sidecar_path: &str, blobs: &[Option<Vec<u8>>]) -> Result<(), FileError> {
use std::io::Write;
let mut buf: Vec<u8> = Vec::with_capacity(
12 + blobs
.iter()
.map(|b| 4 + b.as_ref().map_or(0, |v| v.len()))
.sum::<usize>(),
);
buf.extend_from_slice(b"DAXC");
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&(blobs.len() as u32).to_le_bytes());
for b in blobs {
match b {
Some(v) => {
buf.extend_from_slice(&(v.len() as u32).to_le_bytes());
buf.extend_from_slice(v);
}
None => {
buf.extend_from_slice(&0u32.to_le_bytes());
}
}
}
let mut f = std::fs::File::create(sidecar_path).map_err(FileError::Io)?;
f.write_all(&buf).map_err(FileError::Io)?;
f.sync_all().map_err(FileError::Io)?;
Ok(())
}
pub fn read_dax_sidecar(sidecar_path: &str) -> Result<Option<Vec<Vec<u8>>>, FileError> {
use std::io::Read;
let mut f = match std::fs::File::open(sidecar_path) {
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(FileError::Io(e)),
};
let mut buf = Vec::new();
f.read_to_end(&mut buf).map_err(FileError::Io)?;
if buf.len() < 12 {
return Err(FileError::Truncated);
}
if &buf[0..4] != b"DAXC" {
return Err(FileError::BadMagic);
}
let version = u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]);
if version != 1 {
return Err(FileError::BadVersion(version as u64));
}
let session_count = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]) as usize;
let mut p = 12usize;
let cap = session_count.min((buf.len() - p) / 4);
let mut out = Vec::with_capacity(cap);
for _ in 0..session_count {
if p + 4 > buf.len() {
return Err(FileError::Truncated);
}
let len = u32::from_le_bytes([buf[p], buf[p + 1], buf[p + 2], buf[p + 3]]) as usize;
p += 4;
if p + len > buf.len() {
return Err(FileError::Truncated);
}
out.push(buf[p..p + len].to_vec());
p += len;
}
Ok(Some(out))
}
pub fn capture_and_write_dax_sidecar(
snap_path: &str,
dax_sessions: &[std::sync::Arc<crate::fuse::DaxSession>],
) {
if dax_sessions.is_empty() {
return;
}
let blobs: Vec<Option<Vec<u8>>> = dax_sessions
.iter()
.map(|s| Some(s.snapshot_state()))
.collect();
let sidecar_path = dax_sidecar_path(snap_path);
if let Err(e) = write_dax_sidecar(&sidecar_path, &blobs) {
eprintln!(
"supermachine: warning: dax sidecar write to {sidecar_path} failed ({e:?}); \
cycle-restored guests will SIGBUS on first touch of DAX pages that weren't \
paged-in pre-snapshot"
);
}
}
pub fn read_posix_fs_sidecar(sidecar_path: &str) -> Result<Option<Vec<Vec<u8>>>, FileError> {
use std::io::Read;
let mut f = match std::fs::File::open(sidecar_path) {
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(FileError::Io(e)),
};
let mut buf = Vec::new();
f.read_to_end(&mut buf).map_err(FileError::Io)?;
if buf.len() < 12 {
return Err(FileError::Truncated);
}
if &buf[0..4] != b"PFXS" {
return Err(FileError::BadMagic);
}
let version = u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]);
if version != 1 {
return Err(FileError::BadVersion(version as u64));
}
let mount_count = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]) as usize;
let mut p = 12usize;
let cap = mount_count.min((buf.len() - p) / 4);
let mut out = Vec::with_capacity(cap);
for _ in 0..mount_count {
if p + 4 > buf.len() {
return Err(FileError::Truncated);
}
let len = u32::from_le_bytes([buf[p], buf[p + 1], buf[p + 2], buf[p + 3]]) as usize;
p += 4;
if p + len > buf.len() {
return Err(FileError::Truncated);
}
out.push(buf[p..p + len].to_vec());
p += len;
}
Ok(Some(out))
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub struct CompactSnapshot {
pub captured_host_ticks: u64,
pub captured_clock_ref: u64,
pub ram_gpa: u64,
pub ram_size: usize,
pub intc_blob: Vec<u8>,
pub per_vcpu: Vec<PerVcpuState>,
pub virtio: VirtioSnapshot,
pub pages: Vec<(usize, Box<[u8; 4096]>)>,
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
const COMPACT_PAGE_SIZE: usize = 4096;
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
#[inline]
fn page_is_zero(page: &[u8]) -> bool {
debug_assert_eq!(page.len(), COMPACT_PAGE_SIZE);
use std::arch::aarch64::{vld1q_u8, vmaxvq_u8, vorrq_u8};
unsafe {
let mut p = page.as_ptr();
let end = p.add(COMPACT_PAGE_SIZE);
let mut a0 = vld1q_u8(p);
let mut a1 = vld1q_u8(p.add(16));
let mut a2 = vld1q_u8(p.add(32));
let mut a3 = vld1q_u8(p.add(48));
p = p.add(64);
while p < end {
a0 = vorrq_u8(a0, vld1q_u8(p));
a1 = vorrq_u8(a1, vld1q_u8(p.add(16)));
a2 = vorrq_u8(a2, vld1q_u8(p.add(32)));
a3 = vorrq_u8(a3, vld1q_u8(p.add(48)));
p = p.add(64);
}
let merged = vorrq_u8(vorrq_u8(a0, a1), vorrq_u8(a2, a3));
vmaxvq_u8(merged) == 0
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn capture_compact_pages_serial(memory: &[u8], n_pages: usize) -> Vec<(usize, Box<[u8; 4096]>)> {
let mut pages: Vec<(usize, Box<[u8; 4096]>)> = Vec::with_capacity(n_pages / 20);
for page_idx in 0..n_pages {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &memory[off..off + COMPACT_PAGE_SIZE];
if !page_is_zero(chunk) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
pages.push((off, page));
}
}
pages
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn capture_compact_pages_parallel(
memory: &[u8],
n_pages: usize,
n_threads: usize,
) -> Vec<(usize, Box<[u8; 4096]>)> {
let n = n_threads.max(1);
let pages_per_slab = n_pages.div_ceil(n);
let mem_ptr = memory.as_ptr() as usize;
let mem_len = memory.len();
let mut slab_results: Vec<Vec<(usize, Box<[u8; 4096]>)>> =
std::thread::scope(|s| -> Vec<Vec<(usize, Box<[u8; 4096]>)>> {
let mut handles = Vec::with_capacity(n);
for slab_idx in 0..n {
let slab_start = slab_idx * pages_per_slab;
let slab_end = ((slab_idx + 1) * pages_per_slab).min(n_pages);
if slab_start >= slab_end {
continue;
}
let h = s.spawn(move || -> Vec<(usize, Box<[u8; 4096]>)> {
let _ = mem_len;
let mem: &[u8] =
unsafe { std::slice::from_raw_parts(mem_ptr as *const u8, mem_len) };
let mut local: Vec<(usize, Box<[u8; 4096]>)> =
Vec::with_capacity((slab_end - slab_start) / 20);
for page_idx in slab_start..slab_end {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &mem[off..off + COMPACT_PAGE_SIZE];
if !page_is_zero(chunk) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
local.push((off, page));
}
}
local
});
handles.push(h);
}
handles
.into_iter()
.map(|h| h.join().unwrap_or_default())
.collect()
});
let total: usize = slab_results.iter().map(Vec::len).sum();
let mut pages = Vec::with_capacity(total);
for slab in slab_results.drain(..) {
pages.extend(slab);
}
pages
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn capture_compact(
vm: &MicroVm,
virtio: VirtioSnapshot,
secondary_states: Vec<PerVcpuState>,
) -> Result<CompactSnapshot, SnapshotStreamError> {
let per0 = vm
.vcpu
.capture_snapshot()
.map_err(SnapshotStreamError::Hvf)?;
let intc_blob = vm.vm.capture_intc().map_err(SnapshotStreamError::Hvf)?;
let captured_host_ticks = crate::hypervisor::ActiveVm::host_monotonic_ticks();
let captured_clock_ref =
<crate::hypervisor::ActiveVcpu as crate::hypervisor::HypervisorVcpu>::capture_clock_ref(
&per0,
captured_host_ticks,
);
let mut per_vcpu = Vec::with_capacity(1 + secondary_states.len());
per_vcpu.push(per0);
per_vcpu.extend(secondary_states);
let ram_size = vm.ram_size;
let memory: &[u8] = unsafe { std::slice::from_raw_parts(vm.ram_host, ram_size) };
let n_pages = ram_size / COMPACT_PAGE_SIZE;
let n_threads = snapshot_write_threads();
let pages: Vec<(usize, Box<[u8; 4096]>)> = if n_threads <= 1 {
capture_compact_pages_serial(memory, n_pages)
} else {
capture_compact_pages_parallel(memory, n_pages, n_threads)
};
Ok(CompactSnapshot {
captured_host_ticks,
captured_clock_ref,
ram_gpa: vm.ram_gpa,
ram_size,
intc_blob,
per_vcpu,
virtio,
pages,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
const HVF_HDR_PREFIX: usize = 32;
const RAM_OFFSET_FIELD_POS: u64 = 24;
fn hvf_container_meta(
captured_host_ticks: u64,
captured_clock_ref: u64,
memory_len: u64,
intc_blob: &[u8],
per_vcpu: &[PerVcpuState],
virtio: &VirtioSnapshot,
) -> crate::snapshot_frame::ContainerMeta {
let mut vcpu_blobs = Vec::with_capacity(per_vcpu.len());
for st in per_vcpu {
let mut b = Vec::with_capacity(64 + st.gp_regs.len() * 12 + st.simd_regs.len() * 20);
<crate::hypervisor::ActiveVcpu as crate::hypervisor::HypervisorVcpu>::write_snapshot_state(
st, &mut b,
)
.expect("Vec<u8> write is infallible");
vcpu_blobs.push(b);
}
crate::snapshot_frame::ContainerMeta {
num_cpus: per_vcpu.len() as u8,
mem_size: memory_len,
com1: [0u8; 6],
clock_host_ticks: captured_host_ticks,
clock_ref: captured_clock_ref,
intc_blob: intc_blob.to_vec(),
vcpu_blobs,
devices: virtio.devices.clone(),
tsi_token: None,
vsock_listeners: virtio.vsock_listeners.clone(),
}
}
fn encode_hvf_meta(
ram_gpa: u64,
ram_offset: u64,
cm: &crate::snapshot_frame::ContainerMeta,
) -> Vec<u8> {
let mut buf = Vec::with_capacity(HVF_HDR_PREFIX + 512);
buf.extend_from_slice(&SNAPSHOT_MAGIC);
buf.extend_from_slice(&SNAPSHOT_VERSION.to_le_bytes());
buf.extend_from_slice(&ram_gpa.to_le_bytes());
buf.extend_from_slice(&ram_offset.to_le_bytes());
cm.write_container(&mut buf)
.expect("Vec<u8> write is infallible");
buf
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn encode_compact_meta(snap: &CompactSnapshot, ram_offset_for_header: u64) -> Vec<u8> {
let cm = hvf_container_meta(
snap.captured_host_ticks,
snap.captured_clock_ref,
snap.ram_size as u64,
&snap.intc_blob,
&snap.per_vcpu,
&snap.virtio,
);
encode_hvf_meta(snap.ram_gpa, ram_offset_for_header, &cm)
}
pub fn save_compact_to_file(
snap: &CompactSnapshot,
path: &str,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom, Write};
let partial = format!("{path}.partial");
let mut f = std::fs::File::create(&partial)?;
let meta = encode_compact_meta(snap, 0);
f.write_all(&meta)?;
let cur = f.stream_position()?;
let pad = (RAM_PAGE_ALIGN - (cur % RAM_PAGE_ALIGN)) % RAM_PAGE_ALIGN;
if pad > 0 {
f.write_all(&vec![0u8; pad as usize])?;
}
let ram_offset = f.stream_position()?;
f.set_len(ram_offset + snap.ram_size as u64)?;
let n_threads = snapshot_write_threads();
let data_bytes = if n_threads <= 1 || snap.pages.len() < 64 {
let mut data_bytes = 0u64;
for (page_off, page) in &snap.pages {
f.seek(SeekFrom::Start(ram_offset + *page_off as u64))?;
f.write_all(page.as_ref())?;
data_bytes += COMPACT_PAGE_SIZE as u64;
}
data_bytes
} else {
save_compact_pages_parallel(&f, &snap.pages, ram_offset, n_threads)?
};
f.seek(SeekFrom::Start(RAM_OFFSET_FIELD_POS))?;
f.write_all(&ram_offset.to_le_bytes())?;
drop(f);
std::fs::rename(&partial, path)?;
Ok(SnapshotWriteStats {
ram_bytes: snap.ram_size as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: snap.ram_size as u64 - data_bytes,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn save_compact_pages_parallel(
f: &std::fs::File,
pages: &[(usize, Box<[u8; 4096]>)],
ram_offset: u64,
n_threads: usize,
) -> Result<u64, FileError> {
use std::os::unix::fs::FileExt;
let n = n_threads.max(1);
let per_thread = pages.len().div_ceil(n);
let total_data: u64 = std::thread::scope(|s| -> Result<u64, std::io::Error> {
let mut handles = Vec::with_capacity(n);
for chunk in pages.chunks(per_thread) {
let f_clone = f.try_clone()?;
let chunk_ref = chunk;
let h = s.spawn(move || -> std::io::Result<u64> {
let mut bytes = 0u64;
let mut i = 0usize;
while i < chunk_ref.len() {
let run_start_off = chunk_ref[i].0;
let mut j = i + 1;
while j < chunk_ref.len()
&& chunk_ref[j].0 == chunk_ref[j - 1].0 + COMPACT_PAGE_SIZE
{
j += 1;
}
let run_len_pages = j - i;
if run_len_pages == 1 {
f_clone.write_all_at(
chunk_ref[i].1.as_ref(),
ram_offset + run_start_off as u64,
)?;
bytes += COMPACT_PAGE_SIZE as u64;
} else {
let mut buf: Vec<u8> =
Vec::with_capacity(run_len_pages * COMPACT_PAGE_SIZE);
for k in i..j {
buf.extend_from_slice(chunk_ref[k].1.as_ref());
}
f_clone.write_all_at(&buf, ram_offset + run_start_off as u64)?;
bytes += (run_len_pages * COMPACT_PAGE_SIZE) as u64;
}
i = j;
}
Ok(bytes)
});
handles.push(h);
}
let mut total = 0u64;
for h in handles {
total += h
.join()
.map_err(|_| std::io::Error::other("snapshot write thread panicked"))??;
}
Ok(total)
})?;
Ok(total_data)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn clonefile_via_libc(src: &str, dst: &str) -> std::io::Result<()> {
let src_c = std::ffi::CString::new(src).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"src path contains NUL byte",
)
})?;
let dst_c = std::ffi::CString::new(dst).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"dst path contains NUL byte",
)
})?;
let ret = unsafe { libc::clonefile(src_c.as_ptr(), dst_c.as_ptr(), 0) };
if ret != 0 {
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn load_compact_from_file(path: &str) -> Result<CompactSnapshot, FileError> {
use std::os::fd::AsRawFd;
let (snap, ram_offset, memory_bytes) = load_meta(path)?;
let f = std::fs::File::open(path)?;
let fd = f.as_raw_fd();
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
memory_bytes,
libc::PROT_READ,
libc::MAP_PRIVATE,
fd,
ram_offset as libc::off_t,
)
};
if ptr == libc::MAP_FAILED {
return Err(FileError::Io(std::io::Error::last_os_error()));
}
unsafe {
let _ = libc::madvise(ptr, memory_bytes, libc::MADV_SEQUENTIAL);
}
let memory: &[u8] = unsafe { std::slice::from_raw_parts(ptr as *const u8, memory_bytes) };
let n_pages = memory_bytes / COMPACT_PAGE_SIZE;
let mut pages: Vec<(usize, Box<[u8; 4096]>)> = Vec::with_capacity(n_pages / 20);
for page_idx in 0..n_pages {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &memory[off..off + COMPACT_PAGE_SIZE];
if !page_is_zero(chunk) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
pages.push((off, page));
}
}
unsafe {
libc::munmap(ptr, memory_bytes);
}
Ok(CompactSnapshot {
captured_host_ticks: snap.captured_host_ticks,
captured_clock_ref: snap.captured_clock_ref,
ram_gpa: snap.ram_gpa,
ram_size: memory_bytes,
intc_blob: snap.intc_blob,
per_vcpu: snap.per_vcpu,
virtio: snap.virtio,
pages,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn save_compact_to_file_via_clone(
snap: &CompactSnapshot,
base: &CompactSnapshot,
base_path: &str,
out_path: &str,
) -> Result<SnapshotWriteStats, FileError> {
use std::collections::{HashMap, HashSet};
use std::os::unix::fs::FileExt;
let partial = format!("{out_path}.partial");
let _ = std::fs::remove_file(&partial);
clonefile_via_libc(base_path, &partial).map_err(FileError::Io)?;
let f = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(&partial)?;
let mut ram_off_bytes = [0u8; 8];
f.read_exact_at(&mut ram_off_bytes, RAM_OFFSET_FIELD_POS)?;
let ram_offset = u64::from_le_bytes(ram_off_bytes);
let base_ram_size = base.ram_size as u64;
if base_ram_size != snap.ram_size as u64 {
let _ = std::fs::remove_file(&partial);
return Err(FileError::Io(std::io::Error::other(format!(
"diff snapshot: base ram_size {base_ram_size} != warm ram_size {}",
snap.ram_size
))));
}
let meta = encode_compact_meta(snap, ram_offset);
if (meta.len() as u64) > ram_offset {
let _ = std::fs::remove_file(&partial);
return Err(FileError::Io(std::io::Error::other(format!(
"diff snapshot: warm meta {} bytes overflows base ram_offset {ram_offset}",
meta.len()
))));
}
let mut padded = meta;
padded.resize(ram_offset as usize, 0);
f.write_all_at(&padded, 0)?;
let base_lookup: HashMap<usize, &[u8; 4096]> =
base.pages.iter().map(|(o, p)| (*o, p.as_ref())).collect();
let warm_offsets: HashSet<usize> = snap.pages.iter().map(|(o, _)| *o).collect();
let mut data_pages: Vec<(usize, &[u8; 4096])> = Vec::new();
for (off, warm_p) in &snap.pages {
let warm_bytes: &[u8; 4096] = warm_p.as_ref();
match base_lookup.get(off) {
None => data_pages.push((*off, warm_bytes)),
Some(b) if *b != warm_bytes => data_pages.push((*off, warm_bytes)),
_ => {}
}
}
let mut zero_offsets: Vec<usize> = Vec::new();
for (off, _) in &base.pages {
if !warm_offsets.contains(off) {
zero_offsets.push(*off);
}
}
zero_offsets.sort_unstable();
let n_threads = snapshot_write_threads();
let data_bytes_written = if n_threads <= 1 || data_pages.len() < 64 {
let mut bytes = 0u64;
for (off, page) in &data_pages {
f.write_all_at(*page, ram_offset + *off as u64)?;
bytes += COMPACT_PAGE_SIZE as u64;
}
bytes
} else {
save_diff_pages_parallel(&f, &data_pages, ram_offset, n_threads)?
};
let mut hole_bytes = 0u64;
if !zero_offsets.is_empty() {
use std::os::unix::io::AsRawFd;
let fd = f.as_raw_fd();
let mut i = 0;
while i < zero_offsets.len() {
let run_start = zero_offsets[i];
let mut run_end = run_start + COMPACT_PAGE_SIZE;
i += 1;
while i < zero_offsets.len() && zero_offsets[i] == run_end {
run_end += COMPACT_PAGE_SIZE;
i += 1;
}
let span = (run_end - run_start) as i64;
let punch = libc::fpunchhole_t {
fp_flags: 0,
reserved: 0,
fp_offset: (ram_offset + run_start as u64) as libc::off_t,
fp_length: span,
};
let r =
unsafe { libc::fcntl(fd, libc::F_PUNCHHOLE, &punch as *const libc::fpunchhole_t) };
if r == 0 {
hole_bytes += span as u64;
} else {
let zeros = vec![0u8; span as usize];
f.write_all_at(&zeros, ram_offset + run_start as u64)?;
}
}
}
let data_bytes = data_bytes_written + hole_bytes;
drop(f);
std::fs::rename(&partial, out_path)?;
Ok(SnapshotWriteStats {
ram_bytes: snap.ram_size as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: snap.ram_size as u64 - data_bytes,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn save_diff_pages_parallel(
f: &std::fs::File,
pages: &[(usize, &[u8; 4096])],
ram_offset: u64,
n_threads: usize,
) -> Result<u64, FileError> {
use std::os::unix::fs::FileExt;
let n = n_threads.max(1);
let per_thread = pages.len().div_ceil(n);
let total: u64 = std::thread::scope(|s| -> Result<u64, std::io::Error> {
let mut handles = Vec::with_capacity(n);
for chunk in pages.chunks(per_thread) {
let f_clone = f.try_clone()?;
let chunk_ref: &[(usize, &[u8; 4096])] = chunk;
let h = s.spawn(move || -> std::io::Result<u64> {
let mut bytes = 0u64;
for (off, page) in chunk_ref {
f_clone.write_all_at(*page, ram_offset + *off as u64)?;
bytes += COMPACT_PAGE_SIZE as u64;
}
Ok(bytes)
});
handles.push(h);
}
let mut total = 0u64;
for h in handles {
total += h
.join()
.map_err(|_| std::io::Error::other("snapshot diff write thread panicked"))??;
}
Ok(total)
})?;
Ok(total)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn capture_and_save_streaming(
vm: &MicroVm,
virtio: &VirtioSnapshot,
secondary_states: &[PerVcpuState],
path: &str,
) -> Result<SnapshotWriteStats, SnapshotStreamError> {
let per0 = vm
.vcpu
.capture_snapshot()
.map_err(SnapshotStreamError::Hvf)?;
let intc_blob = vm.vm.capture_intc().map_err(SnapshotStreamError::Hvf)?;
let captured_host_ticks = crate::hypervisor::ActiveVm::host_monotonic_ticks();
let captured_clock_ref =
<crate::hypervisor::ActiveVcpu as crate::hypervisor::HypervisorVcpu>::capture_clock_ref(
&per0,
captured_host_ticks,
);
let mut per_vcpu = Vec::with_capacity(1 + secondary_states.len());
per_vcpu.push(per0);
per_vcpu.extend_from_slice(secondary_states);
let memory: &[u8] = unsafe { std::slice::from_raw_parts(vm.ram_host, vm.ram_size) };
write_snapshot_file(
path,
captured_host_ticks,
captured_clock_ref,
vm.ram_gpa,
memory,
&intc_blob,
&per_vcpu,
virtio,
)
.map_err(SnapshotStreamError::Io)
}
#[derive(Debug)]
pub enum SnapshotStreamError {
Hvf(crate::hypervisor::ActiveError),
Io(FileError),
}
impl std::fmt::Display for SnapshotStreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Hvf(e) => write!(f, "snapshot capture: {e:?}"),
Self::Io(e) => write!(f, "snapshot save: {e:?}"),
}
}
}
pub fn save_to_file_with_stats(
path: &str,
snap: &Snapshot,
) -> Result<SnapshotWriteStats, FileError> {
write_snapshot_file(
path,
snap.captured_host_ticks,
snap.captured_clock_ref,
snap.ram_gpa,
&snap.memory,
&snap.intc_blob,
&snap.per_vcpu,
&snap.virtio,
)
}
#[allow(clippy::too_many_arguments)]
fn write_snapshot_file(
path: &str,
captured_host_ticks: u64,
captured_clock_ref: u64,
ram_gpa: u64,
memory: &[u8],
intc_blob: &[u8],
per_vcpu: &[PerVcpuState],
virtio: &VirtioSnapshot,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom};
let partial = format!("{path}.partial");
let mut f = std::fs::File::create(&partial)?;
let cm = hvf_container_meta(
captured_host_ticks,
captured_clock_ref,
memory.len() as u64,
intc_blob,
per_vcpu,
virtio,
);
let meta = encode_hvf_meta(ram_gpa, 0, &cm);
f.write_all(&meta)?;
let cur = f.stream_position()?;
let pad = (RAM_PAGE_ALIGN - (cur % RAM_PAGE_ALIGN)) % RAM_PAGE_ALIGN;
if pad > 0 {
f.write_all(&vec![0u8; pad as usize])?;
}
let ram_offset = f.stream_position()?;
let stats = write_sparse_ram(&mut f, memory, ram_offset)?;
f.seek(SeekFrom::Start(RAM_OFFSET_FIELD_POS))?;
f.write_all(&ram_offset.to_le_bytes())?;
drop(f);
std::fs::rename(&partial, path)?;
Ok(stats)
}
fn snapshot_write_threads() -> usize {
if let Ok(v) = std::env::var("SUPERMACHINE_SNAPSHOT_WRITE_THREADS") {
if let Ok(n) = v.parse::<usize>() {
return n.max(1);
}
}
4
}
fn write_sparse_ram(
f: &mut std::fs::File,
memory: &[u8],
ram_offset: u64,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom};
let n_threads = snapshot_write_threads();
let total = memory.len();
let logical_end = ram_offset + total as u64;
f.set_len(logical_end)?;
let stats = if n_threads <= 1 {
write_sparse_ram_serial(f, memory, ram_offset)?
} else {
write_sparse_ram_parallel(f, memory, ram_offset, n_threads)?
};
f.seek(SeekFrom::Start(logical_end))?;
Ok(stats)
}
fn write_sparse_ram_serial(
f: &std::fs::File,
memory: &[u8],
ram_offset: u64,
) -> Result<SnapshotWriteStats, FileError> {
use std::os::unix::fs::FileExt;
let mut pos = 0usize;
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
while pos < memory.len() {
let run_is_zero =
is_zero_chunk(&memory[pos..(pos + (memory.len() - pos).min(SPARSE_RAM_CHUNK))]);
let run_start = pos;
pos += (memory.len() - pos).min(SPARSE_RAM_CHUNK);
while pos < memory.len() {
let next_len = (memory.len() - pos).min(SPARSE_RAM_CHUNK);
let next_is_zero = is_zero_chunk(&memory[pos..pos + next_len]);
if next_is_zero != run_is_zero {
break;
}
pos += next_len;
}
let run_len = pos - run_start;
if run_is_zero {
zero_bytes += run_len as u64;
} else {
f.write_all_at(&memory[run_start..pos], ram_offset + run_start as u64)?;
data_bytes += run_len as u64;
}
}
Ok(SnapshotWriteStats {
ram_bytes: memory.len() as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: zero_bytes,
})
}
fn write_sparse_ram_parallel(
f: &std::fs::File,
memory: &[u8],
ram_offset: u64,
n_threads: usize,
) -> Result<SnapshotWriteStats, FileError> {
let total = memory.len();
let slab = total.div_ceil(n_threads).div_ceil(SPARSE_RAM_CHUNK) * SPARSE_RAM_CHUNK;
let mut bounds: Vec<(usize, usize)> = Vec::with_capacity(n_threads);
let mut start = 0usize;
while start < total {
let end = (start + slab).min(total);
bounds.push((start, end));
start = end;
}
let mem_ptr = memory.as_ptr() as usize;
let mem_len = memory.len();
let stats = std::thread::scope(|s| -> Result<(u64, u64), std::io::Error> {
let mut handles = Vec::with_capacity(bounds.len());
for (slab_start, slab_end) in bounds {
let f_clone = f.try_clone()?;
let h = s.spawn(move || -> std::io::Result<(u64, u64)> {
let _ = mem_len;
let slab: &[u8] = unsafe {
std::slice::from_raw_parts(
(mem_ptr as *const u8).add(slab_start),
slab_end - slab_start,
)
};
let base = ram_offset + slab_start as u64;
write_sparse_ram_slab(&f_clone, slab, base)
});
handles.push(h);
}
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
for h in handles {
let (d, z) = h
.join()
.map_err(|_| std::io::Error::other("snapshot write thread panicked"))??;
data_bytes += d;
zero_bytes += z;
}
Ok((data_bytes, zero_bytes))
})?;
Ok(SnapshotWriteStats {
ram_bytes: total as u64,
ram_data_bytes: stats.0,
ram_zero_bytes: stats.1,
})
}
fn write_sparse_ram_slab(
f: &std::fs::File,
memory: &[u8],
base_offset: u64,
) -> std::io::Result<(u64, u64)> {
use std::os::unix::fs::FileExt;
let mut pos = 0usize;
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
while pos < memory.len() {
let run_is_zero =
is_zero_chunk(&memory[pos..(pos + (memory.len() - pos).min(SPARSE_RAM_CHUNK))]);
let run_start = pos;
pos += (memory.len() - pos).min(SPARSE_RAM_CHUNK);
while pos < memory.len() {
let next_len = (memory.len() - pos).min(SPARSE_RAM_CHUNK);
let next_is_zero = is_zero_chunk(&memory[pos..pos + next_len]);
if next_is_zero != run_is_zero {
break;
}
pos += next_len;
}
let run_len = pos - run_start;
if run_is_zero {
zero_bytes += run_len as u64;
} else {
f.write_all_at(&memory[run_start..pos], base_offset + run_start as u64)?;
data_bytes += run_len as u64;
}
}
Ok((data_bytes, zero_bytes))
}
fn is_zero_chunk(chunk: &[u8]) -> bool {
chunk.iter().all(|b| *b == 0)
}
pub fn load_from_file(path: &str) -> Result<Snapshot, FileError> {
load_from_file_inner(path, false).map(|(snap, _, _)| snap)
}
pub fn load_meta(path: &str) -> Result<(Snapshot, u64, usize), FileError> {
load_from_file_inner(path, true)
}
fn load_from_file_inner(path: &str, skip_ram: bool) -> Result<(Snapshot, u64, usize), FileError> {
use std::io::{Seek, SeekFrom};
let mut f = std::fs::File::open(path)?;
let mut prefix = [0u8; HVF_HDR_PREFIX];
f.read_exact(&mut prefix)
.map_err(|_| FileError::Truncated)?;
if prefix[0..8] != SNAPSHOT_MAGIC {
return Err(FileError::BadMagic);
}
let version = le_u64(&prefix[8..16])?;
if version != SNAPSHOT_VERSION {
return Err(FileError::BadVersion(version));
}
let ram_gpa = le_u64(&prefix[16..24])?;
let ram_offset = le_u64(&prefix[24..32])?;
let cm = crate::snapshot_frame::ContainerMeta::read_container(&mut f)
.map_err(|_| FileError::Truncated)?;
let memory_bytes = cm.mem_size as usize;
let file_len = f.metadata()?.len();
if !crate::snapshot_frame::ram_region_within(file_len, ram_offset, memory_bytes as u64) {
return Err(FileError::Truncated);
}
if cm.vcpu_blobs.is_empty() {
return Err(FileError::Malformed("snapshot contains no vCPU state"));
}
let mut per_vcpu = Vec::with_capacity(cm.vcpu_blobs.len());
for vb in &cm.vcpu_blobs {
let st = <crate::hypervisor::ActiveVcpu as crate::hypervisor::HypervisorVcpu>::read_snapshot_state(
&mut std::io::Cursor::new(vb),
)
.map_err(|_| FileError::Truncated)?;
per_vcpu.push(st);
}
let memory = if skip_ram {
Vec::new()
} else {
f.seek(SeekFrom::Start(ram_offset))
.map_err(|_| FileError::Truncated)?;
let mut m = vec![0u8; memory_bytes];
f.read_exact(&mut m).map_err(|_| FileError::Truncated)?;
m
};
Ok((
Snapshot {
captured_host_ticks: cm.clock_host_ticks,
captured_clock_ref: cm.clock_ref,
ram_gpa,
memory,
intc_blob: cm.intc_blob,
per_vcpu,
virtio: VirtioSnapshot {
devices: cm.devices,
vsock_listeners: cm.vsock_listeners,
},
},
ram_offset,
memory_bytes,
))
}
pub fn mmap_ram_cow(path: &str) -> std::io::Result<(*mut u8, usize)> {
let (_snap, ram_offset, memory_bytes) =
load_meta(path).map_err(|e| std::io::Error::other(format!("load_meta: {e:?}")))?;
mmap_ram_cow_at(path, ram_offset, memory_bytes)
}
pub fn mmap_ram_cow_at(
path: &str,
ram_offset: u64,
memory_bytes: usize,
) -> std::io::Result<(*mut u8, usize)> {
let f = std::fs::File::open(path)?;
let ptr = crate::snapshot_frame::cow_map_ram(&f, ram_offset, memory_bytes, 0)?;
Ok((ptr, memory_bytes))
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use std::path::PathBuf;
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_all_zeros() {
let page = [0u8; 4096];
assert!(page_is_zero(&page));
}
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_first_byte_nonzero() {
let mut page = [0u8; 4096];
page[0] = 1;
assert!(!page_is_zero(&page));
}
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_last_byte_nonzero() {
let mut page = [0u8; 4096];
page[4095] = 1;
assert!(!page_is_zero(&page));
}
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_middle_byte_nonzero() {
let mut page = [0u8; 4096];
page[2048] = 0xFF;
assert!(!page_is_zero(&page));
}
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_every_byte_position() {
for k in 0..4096 {
let mut page = [0u8; 4096];
page[k] = 1;
assert!(!page_is_zero(&page), "missed non-zero byte at position {k}");
}
}
#[cfg(target_arch = "aarch64")]
#[test]
fn page_is_zero_all_ones() {
let page = [0xFFu8; 4096];
assert!(!page_is_zero(&page));
}
#[test]
fn tsi_listener_record_roundtrips_inet_port() {
let cases = [
Some(9222u16), Some(80),
Some(65535), None, ];
for inet_port in cases {
let l = TsiListenerSnapshot {
cid: 3,
peer_port: 1234,
vm_port: 8080,
family: 2, socktype: 1,
inet_port,
};
let mut buf = Vec::new();
write_tsi_listener_record(&mut buf, &l);
assert_eq!(
buf.len(),
TSI_LISTENER_RECORD_LEN,
"record must be exactly {TSI_LISTENER_RECORD_LEN} bytes"
);
let e: [u8; TSI_LISTENER_RECORD_LEN] = buf.try_into().unwrap();
let got = read_tsi_listener_record(&e).expect("decode");
assert_eq!(got.cid, l.cid);
assert_eq!(got.peer_port, l.peer_port);
assert_eq!(got.vm_port, l.vm_port);
assert_eq!(got.family, l.family);
assert_eq!(got.socktype, l.socktype);
assert_eq!(
got.inet_port, inet_port,
"inet_port must survive the record round-trip (dropped pre-v10)"
);
}
}
#[test]
fn smpark_sidecar_roundtrips_gpa() {
let snap = temp_snapshot_path("smpark-rt");
let snap_s = snap.to_str().unwrap();
let _ = std::fs::remove_file(smpark_sidecar_path(snap_s));
assert_eq!(read_smpark_sidecar(snap_s), None, "absent sidecar → None");
write_smpark_sidecar(snap_s, 0x8_0000_1234).expect("write smpark sidecar");
assert_eq!(
read_smpark_sidecar(snap_s),
Some(0x8_0000_1234),
"smpark GPA must round-trip through the sidecar"
);
let _ = std::fs::remove_file(smpark_sidecar_path(snap_s));
}
fn temp_snapshot_path(name: &str) -> PathBuf {
let mut path = std::env::temp_dir();
path.push(format!(
"snapshot-{name}-{}-{:?}.snap",
std::process::id(),
std::thread::current().id()
));
path
}
fn write_bytes(path: &PathBuf, bytes: &[u8]) -> std::io::Result<()> {
let mut file = std::fs::File::create(path)?;
file.write_all(bytes)
}
fn hvf_file(ram_gpa: u64, ram_offset: u64, body: &[u8]) -> Vec<u8> {
let mut v = Vec::with_capacity(HVF_HDR_PREFIX + body.len());
v.extend_from_slice(&SNAPSHOT_MAGIC);
v.extend_from_slice(&SNAPSHOT_VERSION.to_le_bytes());
v.extend_from_slice(&ram_gpa.to_le_bytes());
v.extend_from_slice(&ram_offset.to_le_bytes());
v.extend_from_slice(body);
v
}
fn container_body(n_vcpus: usize, mem_size: u64) -> Vec<u8> {
let per_vcpu: Vec<PerVcpuState> = (0..n_vcpus).map(|_| PerVcpuState::default()).collect();
let cm = hvf_container_meta(0, 0, mem_size, &[], &per_vcpu, &VirtioSnapshot::default());
let mut b = Vec::new();
cm.write_container(&mut b).unwrap();
b
}
#[test]
fn load_rejects_truncated_snapshot() -> std::io::Result<()> {
let path = temp_snapshot_path("truncated");
write_bytes(&path, b"SMS")?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(result, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn load_rejects_bad_magic() -> std::io::Result<()> {
let path = temp_snapshot_path("bad-magic");
write_bytes(&path, &[0u8; 72])?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(result, Err(FileError::BadMagic)));
Ok(())
}
#[test]
fn load_rejects_missing_vcpu_state() -> std::io::Result<()> {
let path = temp_snapshot_path("no-vcpu");
let body = container_body(0, 0);
let ram_offset = (HVF_HDR_PREFIX + body.len()) as u64;
write_bytes(&path, &hvf_file(0, ram_offset, &body))?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(
result,
Err(FileError::Malformed("snapshot contains no vCPU state"))
));
Ok(())
}
fn sample_snapshot(memory: Vec<u8>) -> Snapshot {
Snapshot {
captured_host_ticks: 0x0123_4567_89AB_CDEF,
captured_clock_ref: 0xFEDC_BA98_7654_3210,
ram_gpa: 0x4000_0000,
memory,
intc_blob: vec![0xDE, 0xAD, 0xBE, 0xEF],
per_vcpu: vec![PerVcpuState {
gp_regs: vec![(0, 0x1111_2222), (1, 0x3333_4444)],
vtimer_offset: 0x9999,
..Default::default()
}],
virtio: VirtioSnapshot::default(),
}
}
#[test]
fn save_load_round_trips_sparse_ram() {
let path = temp_snapshot_path("roundtrip-sparse");
let path_s = path.to_str().unwrap().to_string();
let mut memory = vec![0u8; 16 * 4096];
for (i, b) in memory[..4096].iter_mut().enumerate() {
*b = (i % 251) as u8; }
memory[14 * 4096 + 100] = 0xAB; memory[15 * 4096 + 4095] = 0xCD; let expected = memory.clone();
save_to_file(&path_s, &sample_snapshot(memory)).expect("save");
assert!(path.exists(), "snapshot must exist after save");
assert!(
!PathBuf::from(format!("{path_s}.partial")).exists(),
"no leftover .partial after a successful save"
);
let loaded = load_from_file(&path_s).expect("load");
let _ = std::fs::remove_file(&path);
assert_eq!(loaded.ram_gpa, 0x4000_0000);
assert_eq!(loaded.captured_host_ticks, 0x0123_4567_89AB_CDEF);
assert_eq!(loaded.captured_clock_ref, 0xFEDC_BA98_7654_3210);
assert_eq!(loaded.memory.len(), expected.len(), "RAM length preserved");
assert!(loaded.memory == expected, "RAM must round-trip byte-exact");
assert_eq!(loaded.per_vcpu.len(), 1);
assert_eq!(loaded.intc_blob, vec![0xDE, 0xAD, 0xBE, 0xEF]);
}
#[test]
fn save_load_round_trips_all_zero_ram() {
let path = temp_snapshot_path("roundtrip-zero");
let path_s = path.to_str().unwrap().to_string();
let len = 8 * 4096;
save_to_file(&path_s, &sample_snapshot(vec![0u8; len])).expect("save");
let loaded = load_from_file(&path_s).expect("load");
let _ = std::fs::remove_file(&path);
assert_eq!(loaded.memory.len(), len, "zero RAM length preserved");
assert!(
loaded.memory.iter().all(|&b| b == 0),
"all bytes still zero"
);
}
#[test]
fn save_load_preserves_per_vcpu_registers() {
let path = temp_snapshot_path("roundtrip-regs");
let path_s = path.to_str().unwrap().to_string();
save_to_file(&path_s, &sample_snapshot(vec![0u8; 4096])).expect("save");
let loaded = load_from_file(&path_s).expect("load");
let _ = std::fs::remove_file(&path);
let v = &loaded.per_vcpu[0];
assert_eq!(v.vtimer_offset, 0x9999, "vtimer_offset round-trips");
assert!(
v.gp_regs.contains(&(0, 0x1111_2222)) && v.gp_regs.contains(&(1, 0x3333_4444)),
"general-purpose regs round-trip: {:?}",
v.gp_regs
);
}
fn sidecar_path(name: &str) -> PathBuf {
let mut path = std::env::temp_dir();
path.push(format!(
"sidecar-{name}-{}-{:?}.bin",
std::process::id(),
std::thread::current().id()
));
path
}
#[test]
fn dax_sidecar_huge_count_does_not_ooms() -> std::io::Result<()> {
let mut bytes = Vec::new();
bytes.extend_from_slice(b"DAXC");
bytes.extend_from_slice(&1u32.to_le_bytes()); bytes.extend_from_slice(&u32::MAX.to_le_bytes()); let path = sidecar_path("dax-huge");
write_bytes(&path, &bytes)?;
let r = read_dax_sidecar(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert!(matches!(r, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn posix_fs_sidecar_huge_count_does_not_ooms() -> std::io::Result<()> {
let mut bytes = Vec::new();
bytes.extend_from_slice(b"PFXS");
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.extend_from_slice(&u32::MAX.to_le_bytes());
let path = sidecar_path("pfxs-huge");
write_bytes(&path, &bytes)?;
let r = read_posix_fs_sidecar(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert!(matches!(r, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn dax_sidecar_valid_single_record_roundtrips() -> std::io::Result<()> {
let payload = b"hello-dax-blob";
let mut bytes = Vec::new();
bytes.extend_from_slice(b"DAXC");
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.extend_from_slice(&1u32.to_le_bytes()); bytes.extend_from_slice(&(payload.len() as u32).to_le_bytes());
bytes.extend_from_slice(payload);
let path = sidecar_path("dax-valid");
write_bytes(&path, &bytes)?;
let r = read_dax_sidecar(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert_eq!(r.unwrap(), Some(vec![payload.to_vec()]));
Ok(())
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(256))]
#[test]
fn sidecar_parsers_tolerate_arbitrary_bytes(
prefix in proptest::prelude::prop_oneof![
proptest::prelude::Just(b"DAXC".to_vec()),
proptest::prelude::Just(b"PFXS".to_vec()),
proptest::prelude::Just(Vec::<u8>::new()),
],
body in proptest::collection::vec(proptest::prelude::any::<u8>(), 0..512),
) {
let mut bytes = prefix.clone();
bytes.extend_from_slice(&body);
let path = sidecar_path("fuzz");
std::fs::write(&path, &bytes).unwrap();
let _ = read_dax_sidecar(path.to_str().unwrap());
let _ = read_posix_fs_sidecar(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
}
}
#[test]
fn load_rejects_lying_memory_bytes() -> std::io::Result<()> {
let body = container_body(1, 1u64 << 40); let ram_offset = (HVF_HDR_PREFIX + body.len()) as u64; let path = temp_snapshot_path("lying-mem");
write_bytes(&path, &hvf_file(0, ram_offset, &body))?;
let r = load_from_file(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert!(matches!(r, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn load_rejects_huge_intc_blob_len() -> std::io::Result<()> {
let mut body = Vec::new();
body.push(1u8); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&[0u8; 6]); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&u32::MAX.to_le_bytes()); let path = temp_snapshot_path("huge-gic");
write_bytes(&path, &hvf_file(0, 4096, &body))?;
let r = load_from_file(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert!(matches!(r, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn load_rejects_huge_n_vcpus() -> std::io::Result<()> {
let mut body = Vec::new();
body.push(1u8); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&[0u8; 6]); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&0u64.to_le_bytes()); body.extend_from_slice(&0u32.to_le_bytes()); body.extend_from_slice(&u32::MAX.to_le_bytes()); let path = temp_snapshot_path("huge-vcpus");
write_bytes(&path, &hvf_file(0, 4096, &body))?;
let r = load_from_file(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
assert!(matches!(r, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn mmap_ram_cow_at_rejects_region_past_eof() -> std::io::Result<()> {
let path = temp_snapshot_path("mmap-oob");
write_bytes(&path, &[0u8; 4096])?; let r = mmap_ram_cow_at(path.to_str().unwrap(), 0, 1 << 30); let _ = std::fs::remove_file(&path);
assert!(r.is_err(), "mapping 1 GiB out of a 4 KiB file must fail");
Ok(())
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig::with_cases(256))]
#[test]
fn loader_tolerates_arbitrary_header_sizes(
ram_gpa in proptest::prelude::any::<u64>(),
ram_offset in proptest::prelude::any::<u64>(),
body in proptest::collection::vec(proptest::prelude::any::<u8>(), 0..512),
) {
let bytes = hvf_file(ram_gpa, ram_offset, &body);
let path = temp_snapshot_path("loader-fuzz");
std::fs::write(&path, &bytes).unwrap();
let _ = load_from_file(path.to_str().unwrap());
let _ = load_meta(path.to_str().unwrap());
let _ = std::fs::remove_file(&path);
}
}
}