#![cfg(all(target_os = "macos", target_arch = "aarch64"))]
use std::io::{Read, Write};
use applevisor_sys as av;
use crate::devices::virtio::mmio::{MmioSnapshot, QueueSnapshot};
use crate::devices::virtio::vsock::muxer::TsiListenerSnapshot;
use crate::hvf::{self, Vcpu};
use crate::vmm::vstate::MicroVm;
const SNAPSHOT_MAGIC: [u8; 8] = *b"SMSNAP\x08\x00";
const SNAPSHOT_VERSION: u64 = 8;
const RAM_PAGE_ALIGN: u64 = 16384;
const SPARSE_RAM_CHUNK: usize = 64 * 1024;
#[derive(Default, Clone)]
pub struct PerVcpuState {
pub gp_regs: Vec<(u32, u64)>, pub simd_regs: Vec<(u32, u128)>, pub sys_regs: Vec<(u32, u64)>, pub icc_regs: Vec<(u32, u64)>, pub redist_regs: Vec<(u32, u64)>, pub vtimer_offset: u64,
}
#[derive(Default)]
pub struct VirtioSnapshot {
pub mmio: Vec<MmioSnapshot>,
pub vsock_listeners: Vec<TsiListenerSnapshot>,
}
pub struct Snapshot {
pub captured_mach_time: u64,
pub captured_cntvct: u64,
pub ram_gpa: u64,
pub memory: Vec<u8>,
pub gic_blob: Vec<u8>,
pub per_vcpu: Vec<PerVcpuState>,
pub virtio: VirtioSnapshot,
}
#[derive(Default, Clone, Copy)]
pub struct SnapshotWriteStats {
pub ram_bytes: u64,
pub ram_data_bytes: u64,
pub ram_zero_bytes: u64,
}
#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
pub struct SnapshotRestoreTimings {
pub ram_copy_us: u128,
pub gic_restore_us: u128,
pub vcpu_restore_us: u128,
pub vtimer_offset_us: u128,
}
#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
pub struct SnapshotRestoreOptions {
pub skip_gic_blob: bool,
}
fn gp_reg_enum() -> Vec<av::hv_reg_t> {
let mut out = Vec::with_capacity(37);
let x0 = av::hv_reg_t::X0 as u32;
for i in 0..=30u32 {
out.push(unsafe { std::mem::transmute::<u32, av::hv_reg_t>(x0 + i) });
}
out.push(av::hv_reg_t::FP);
out.push(av::hv_reg_t::LR);
out.push(av::hv_reg_t::PC);
out.push(av::hv_reg_t::CPSR);
out.push(av::hv_reg_t::FPCR);
out.push(av::hv_reg_t::FPSR);
out
}
fn simd_reg_enum() -> Vec<av::hv_simd_fp_reg_t> {
let q0 = av::hv_simd_fp_reg_t::Q0 as u32;
(0..32u32)
.map(|i| unsafe { std::mem::transmute::<u32, av::hv_simd_fp_reg_t>(q0 + i) })
.collect()
}
fn sys_reg_enum() -> Vec<av::hv_sys_reg_t> {
use av::hv_sys_reg_t::*;
vec![
MPIDR_EL1,
SCTLR_EL1,
CPACR_EL1,
TCR_EL1,
TTBR0_EL1,
TTBR1_EL1,
MAIR_EL1,
AMAIR_EL1,
VBAR_EL1,
CONTEXTIDR_EL1,
TPIDR_EL1,
SPSR_EL1,
ELR_EL1,
SP_EL0,
SP_EL1,
ESR_EL1,
FAR_EL1,
PAR_EL1,
TPIDR_EL0,
TPIDRRO_EL0,
CNTKCTL_EL1,
CSSELR_EL1,
MDSCR_EL1,
APIAKEYLO_EL1,
APIAKEYHI_EL1,
APIBKEYLO_EL1,
APIBKEYHI_EL1,
APDAKEYLO_EL1,
APDAKEYHI_EL1,
APDBKEYLO_EL1,
APDBKEYHI_EL1,
APGAKEYLO_EL1,
APGAKEYHI_EL1,
CNTV_CTL_EL0,
CNTV_CVAL_EL0,
CNTP_CTL_EL0,
CNTP_CVAL_EL0,
]
}
fn icc_reg_enum() -> Vec<av::hv_gic_icc_reg_t> {
use av::hv_gic_icc_reg_t::*;
vec![
PMR_EL1,
BPR0_EL1,
BPR1_EL1,
AP0R0_EL1,
AP1R0_EL1,
RPR_EL1,
CTLR_EL1,
SRE_EL1,
IGRPEN0_EL1,
IGRPEN1_EL1,
]
}
fn redist_reg_offsets() -> Vec<u32> {
let mut v = Vec::with_capacity(11);
v.push(0x10080); v.push(0x10100); v.push(0x10C04); v.push(0x10200); v.push(0x10300); for n in 0..8u32 {
v.push(0x10400 + 4 * n); }
v
}
#[link(name = "System", kind = "framework")]
extern "C" {
fn mach_absolute_time() -> u64;
}
pub fn capture_vcpu_state(vcpu: &Vcpu) -> hvf::Result<PerVcpuState> {
let mut gp_regs = Vec::with_capacity(37);
for r in gp_reg_enum() {
gp_regs.push((r as u32, vcpu.get_reg(r)?));
}
let mut simd_regs = Vec::with_capacity(32);
for r in simd_reg_enum() {
simd_regs.push((r as u32, vcpu.get_simd_fp_reg(r)?));
}
let mut sys_regs = Vec::new();
for r in sys_reg_enum() {
if let Ok(v) = vcpu.get_sys_reg(r) {
sys_regs.push((r as u32, v));
}
}
let mut icc_regs = Vec::new();
for r in icc_reg_enum() {
if let Ok(v) = vcpu.get_icc_reg(r) {
icc_regs.push((r as u32, v));
}
}
let mut redist_regs = Vec::new();
for off in redist_reg_offsets() {
let reg: av::hv_gic_redistributor_reg_t = unsafe { std::mem::transmute(off) };
if let Ok(v) = vcpu.get_redist_reg(reg) {
redist_regs.push((off, v));
}
}
let vtimer_offset = vcpu.get_vtimer_offset()?;
Ok(PerVcpuState {
gp_regs,
simd_regs,
sys_regs,
icc_regs,
redist_regs,
vtimer_offset,
})
}
pub fn capture_snapshot(vm: &MicroVm, virtio: VirtioSnapshot) -> hvf::Result<Snapshot> {
let per0 = capture_vcpu_state(&vm.vcpu)?;
let gic_blob = hvf::gic_state_capture()?;
let mut memory = vec![0u8; vm.ram_size];
unsafe {
std::ptr::copy_nonoverlapping(vm.ram_host, memory.as_mut_ptr(), vm.ram_size);
}
let captured_mach_time = unsafe { mach_absolute_time() };
let captured_cntvct = captured_mach_time.wrapping_sub(per0.vtimer_offset);
Ok(Snapshot {
captured_mach_time,
captured_cntvct,
ram_gpa: vm.ram_gpa,
memory,
gic_blob,
per_vcpu: vec![per0],
virtio,
})
}
pub fn restore_vcpu_state(vcpu: &Vcpu, st: &PerVcpuState) -> hvf::Result<()> {
use av::hv_sys_reg_t as S;
let critical = |id: u32| {
let r: S = unsafe { std::mem::transmute(id) };
matches!(
r,
S::SCTLR_EL1 | S::TCR_EL1 | S::TTBR0_EL1 | S::TTBR1_EL1 | S::MAIR_EL1 | S::VBAR_EL1
)
};
for (id, v) in &st.sys_regs {
let r: S = unsafe { std::mem::transmute(*id) };
if let Err(e) = vcpu.set_sys_reg(r, *v) {
if critical(*id) {
return Err(e);
}
}
}
use av::hv_gic_icc_reg_t as I;
let icc_find = |want: I| -> Option<u64> {
st.icc_regs.iter().find_map(|(id, v)| {
let r: I = unsafe { std::mem::transmute(*id) };
(r == want).then_some(*v)
})
};
if let Some(v) = icc_find(I::SRE_EL1) {
let _ = vcpu.set_icc_reg(I::SRE_EL1, v);
}
for (id, v) in &st.icc_regs {
let r: I = unsafe { std::mem::transmute(*id) };
match r {
I::SRE_EL1 | I::IGRPEN0_EL1 | I::IGRPEN1_EL1 => continue,
_ => {
let _ = vcpu.set_icc_reg(r, *v);
}
}
}
if let Some(v) = icc_find(I::IGRPEN0_EL1) {
let _ = vcpu.set_icc_reg(I::IGRPEN0_EL1, v);
}
if let Some(v) = icc_find(I::IGRPEN1_EL1) {
let _ = vcpu.set_icc_reg(I::IGRPEN1_EL1, v);
}
use av::hv_simd_fp_reg_t as Q;
for (id, v) in &st.simd_regs {
let r: Q = unsafe { std::mem::transmute(*id) };
vcpu.set_simd_fp_reg(r, *v)?;
}
let find_off = |off: u32| -> u64 {
st.redist_regs
.iter()
.find_map(|(o, v)| (*o == off).then_some(*v))
.unwrap_or(0)
};
let write_off = |off: u32, val: u64| -> hvf::Result<()> {
let r: av::hv_gic_redistributor_reg_t = unsafe { std::mem::transmute(off) };
vcpu.set_redist_reg(r, val)
};
write_off(0x10080, find_off(0x10080))?; write_off(0x10C04, find_off(0x10C04))?; for n in 0..8u32 {
write_off(0x10400 + 4 * n, find_off(0x10400 + 4 * n))?;
}
write_off(0x10180, 0xFFFF_FFFF)?; write_off(0x10100, find_off(0x10100))?; write_off(0x10280, 0xFFFF_FFFF)?; write_off(0x10200, find_off(0x10200))?; write_off(0x10380, 0xFFFF_FFFF)?; write_off(0x10300, find_off(0x10300))?;
let _ = vcpu.set_vtimer_mask(false);
let cntv_ctl = st
.sys_regs
.iter()
.find_map(|(id, v)| {
let r: S = unsafe { std::mem::transmute(*id) };
(r == S::CNTV_CTL_EL0).then_some(*v)
})
.unwrap_or(0);
let enable = cntv_ctl & 1 != 0;
let imask = cntv_ctl & 2 != 0;
if enable && !imask {
vcpu.set_sys_reg(S::CNTV_CVAL_EL0, 0)?;
write_off(0x10200, 1u64 << 27)?;
}
use av::hv_reg_t as R;
for (id, v) in &st.gp_regs {
let r: R = unsafe { std::mem::transmute(*id) };
vcpu.set_reg(r, *v)?;
}
Ok(())
}
pub fn restore_snapshot(vm: &MicroVm, snap: &Snapshot) -> hvf::Result<()> {
restore_snapshot_timed(vm, snap).map(|_| ())
}
pub fn restore_snapshot_timed(
vm: &MicroVm,
snap: &Snapshot,
) -> hvf::Result<SnapshotRestoreTimings> {
restore_snapshot_timed_with_options(vm, snap, SnapshotRestoreOptions::default())
}
pub fn restore_snapshot_timed_with_options(
vm: &MicroVm,
snap: &Snapshot,
options: SnapshotRestoreOptions,
) -> hvf::Result<SnapshotRestoreTimings> {
let mut timings = SnapshotRestoreTimings::default();
if !snap.memory.is_empty() {
let t0 = std::time::Instant::now();
unsafe {
std::ptr::copy_nonoverlapping(
snap.memory.as_ptr(),
vm.ram_host,
vm.ram_size.min(snap.memory.len()),
);
}
timings.ram_copy_us = t0.elapsed().as_micros();
}
if !options.skip_gic_blob {
let t0 = std::time::Instant::now();
hvf::gic_state_restore(&snap.gic_blob)?;
timings.gic_restore_us = t0.elapsed().as_micros();
}
let boot_vcpu = snap.per_vcpu.first().ok_or(hvf::Error::Hv(-1))?;
let t0 = std::time::Instant::now();
restore_vcpu_state(&vm.vcpu, boot_vcpu)?;
timings.vcpu_restore_us = t0.elapsed().as_micros();
let now = unsafe { mach_absolute_time() };
let new_offset = now.wrapping_sub(snap.captured_cntvct);
let t0 = std::time::Instant::now();
vm.vcpu.set_vtimer_offset(new_offset)?;
timings.vtimer_offset_us = t0.elapsed().as_micros();
Ok(timings)
}
#[derive(Debug)]
pub enum FileError {
Io(std::io::Error),
BadMagic,
BadVersion(u64),
Malformed(&'static str),
Truncated,
}
impl From<std::io::Error> for FileError {
fn from(e: std::io::Error) -> Self {
Self::Io(e)
}
}
fn le_u16(bytes: &[u8]) -> Result<u16, FileError> {
let bytes = bytes.try_into().map_err(|_| FileError::Truncated)?;
Ok(u16::from_le_bytes(bytes))
}
fn le_u32(bytes: &[u8]) -> Result<u32, FileError> {
let bytes = bytes.try_into().map_err(|_| FileError::Truncated)?;
Ok(u32::from_le_bytes(bytes))
}
fn le_u64(bytes: &[u8]) -> Result<u64, FileError> {
let bytes = bytes.try_into().map_err(|_| FileError::Truncated)?;
Ok(u64::from_le_bytes(bytes))
}
fn le_u128(bytes: &[u8]) -> Result<u128, FileError> {
let bytes = bytes.try_into().map_err(|_| FileError::Truncated)?;
Ok(u128::from_le_bytes(bytes))
}
pub fn save_to_file(path: &str, snap: &Snapshot) -> Result<(), FileError> {
save_to_file_with_stats(path, snap).map(|_| ())
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub struct CompactSnapshot {
pub captured_mach_time: u64,
pub captured_cntvct: u64,
pub ram_gpa: u64,
pub ram_size: usize,
pub gic_blob: Vec<u8>,
pub per_vcpu: Vec<PerVcpuState>,
pub virtio: VirtioSnapshot,
pub pages: Vec<(usize, Box<[u8; 4096]>)>,
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
const COMPACT_PAGE_SIZE: usize = 4096;
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn capture_compact_pages_serial(
memory: &[u8],
n_pages: usize,
) -> Vec<(usize, Box<[u8; 4096]>)> {
let mut pages: Vec<(usize, Box<[u8; 4096]>)> = Vec::with_capacity(n_pages / 20);
for page_idx in 0..n_pages {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &memory[off..off + COMPACT_PAGE_SIZE];
if !chunk.iter().all(|&b| b == 0) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
pages.push((off, page));
}
}
pages
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn capture_compact_pages_parallel(
memory: &[u8],
n_pages: usize,
n_threads: usize,
) -> Vec<(usize, Box<[u8; 4096]>)> {
let n = n_threads.max(1);
let pages_per_slab = (n_pages + n - 1) / n;
let mem_ptr = memory.as_ptr() as usize;
let mem_len = memory.len();
let mut slab_results: Vec<Vec<(usize, Box<[u8; 4096]>)>> =
std::thread::scope(|s| -> Vec<Vec<(usize, Box<[u8; 4096]>)>> {
let mut handles = Vec::with_capacity(n);
for slab_idx in 0..n {
let slab_start = slab_idx * pages_per_slab;
let slab_end = ((slab_idx + 1) * pages_per_slab).min(n_pages);
if slab_start >= slab_end {
continue;
}
let h = s.spawn(move || -> Vec<(usize, Box<[u8; 4096]>)> {
let _ = mem_len;
let mem: &[u8] = unsafe {
std::slice::from_raw_parts(mem_ptr as *const u8, mem_len)
};
let mut local: Vec<(usize, Box<[u8; 4096]>)> =
Vec::with_capacity((slab_end - slab_start) / 20);
for page_idx in slab_start..slab_end {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &mem[off..off + COMPACT_PAGE_SIZE];
if !chunk.iter().all(|&b| b == 0) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
local.push((off, page));
}
}
local
});
handles.push(h);
}
handles
.into_iter()
.map(|h| h.join().unwrap_or_default())
.collect()
});
let total: usize = slab_results.iter().map(Vec::len).sum();
let mut pages = Vec::with_capacity(total);
for slab in slab_results.drain(..) {
pages.extend(slab);
}
pages
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn capture_compact(
vm: &MicroVm,
virtio: VirtioSnapshot,
secondary_states: Vec<PerVcpuState>,
) -> Result<CompactSnapshot, SnapshotStreamError> {
let per0 = capture_vcpu_state(&vm.vcpu).map_err(SnapshotStreamError::Hvf)?;
let gic_blob = hvf::gic_state_capture().map_err(SnapshotStreamError::Hvf)?;
let captured_mach_time = unsafe { mach_absolute_time() };
let captured_cntvct = captured_mach_time.wrapping_sub(per0.vtimer_offset);
let mut per_vcpu = Vec::with_capacity(1 + secondary_states.len());
per_vcpu.push(per0);
per_vcpu.extend(secondary_states);
let ram_size = vm.ram_size;
let memory: &[u8] = unsafe { std::slice::from_raw_parts(vm.ram_host, ram_size) };
let n_pages = ram_size / COMPACT_PAGE_SIZE;
let n_threads = snapshot_write_threads();
let pages: Vec<(usize, Box<[u8; 4096]>)> = if n_threads <= 1 {
capture_compact_pages_serial(memory, n_pages)
} else {
capture_compact_pages_parallel(memory, n_pages, n_threads)
};
Ok(CompactSnapshot {
captured_mach_time,
captured_cntvct,
ram_gpa: vm.ram_gpa,
ram_size,
gic_blob,
per_vcpu,
virtio,
pages,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn encode_compact_meta(snap: &CompactSnapshot, ram_offset_for_header: u64) -> Vec<u8> {
let mut buf = Vec::with_capacity(
72 + snap.gic_blob.len()
+ snap.per_vcpu.len() * 256
+ snap.virtio.mmio.len() * 256
+ snap.virtio.vsock_listeners.len() * 24,
);
buf.extend_from_slice(&SNAPSHOT_MAGIC);
buf.extend_from_slice(&SNAPSHOT_VERSION.to_le_bytes());
buf.extend_from_slice(&snap.captured_mach_time.to_le_bytes());
buf.extend_from_slice(&snap.captured_cntvct.to_le_bytes());
buf.extend_from_slice(&(snap.ram_size as u64).to_le_bytes());
buf.extend_from_slice(&(snap.gic_blob.len() as u64).to_le_bytes());
buf.extend_from_slice(&(snap.per_vcpu.len() as u32).to_le_bytes());
buf.extend_from_slice(&0u32.to_le_bytes());
buf.extend_from_slice(&snap.ram_gpa.to_le_bytes());
buf.extend_from_slice(&ram_offset_for_header.to_le_bytes());
buf.extend_from_slice(&snap.gic_blob);
for st in &snap.per_vcpu {
buf.extend_from_slice(&st.vtimer_offset.to_le_bytes());
buf.extend_from_slice(&(st.gp_regs.len() as u32).to_le_bytes());
buf.extend_from_slice(&(st.simd_regs.len() as u32).to_le_bytes());
buf.extend_from_slice(&(st.sys_regs.len() as u32).to_le_bytes());
buf.extend_from_slice(&(st.icc_regs.len() as u32).to_le_bytes());
buf.extend_from_slice(&(st.redist_regs.len() as u32).to_le_bytes());
for (id, v) in &st.gp_regs {
buf.extend_from_slice(&id.to_le_bytes());
buf.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.simd_regs {
buf.extend_from_slice(&id.to_le_bytes());
buf.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.sys_regs {
buf.extend_from_slice(&id.to_le_bytes());
buf.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.icc_regs {
buf.extend_from_slice(&id.to_le_bytes());
buf.extend_from_slice(&v.to_le_bytes());
}
for (off, v) in &st.redist_regs {
buf.extend_from_slice(&off.to_le_bytes());
buf.extend_from_slice(&v.to_le_bytes());
}
}
buf.extend_from_slice(&(snap.virtio.mmio.len() as u32).to_le_bytes());
for m in &snap.virtio.mmio {
buf.extend_from_slice(&m.driver_features[0].to_le_bytes());
buf.extend_from_slice(&m.driver_features[1].to_le_bytes());
buf.extend_from_slice(&m.status.to_le_bytes());
buf.extend_from_slice(&m.interrupt_status.to_le_bytes());
buf.extend_from_slice(&(m.queues.len() as u32).to_le_bytes());
for q in &m.queues {
buf.extend_from_slice(&q.size.to_le_bytes());
buf.push(if q.ready { 1 } else { 0 });
buf.push(0);
buf.extend_from_slice(&q.desc_table.to_le_bytes());
buf.extend_from_slice(&q.avail_ring.to_le_bytes());
buf.extend_from_slice(&q.used_ring.to_le_bytes());
buf.extend_from_slice(&q.last_avail_idx.to_le_bytes());
buf.extend_from_slice(&q.next_used_idx.to_le_bytes());
}
}
buf.extend_from_slice(&(snap.virtio.vsock_listeners.len() as u32).to_le_bytes());
for l in &snap.virtio.vsock_listeners {
buf.extend_from_slice(&l.cid.to_le_bytes());
buf.extend_from_slice(&l.peer_port.to_le_bytes());
buf.extend_from_slice(&l.vm_port.to_le_bytes());
buf.extend_from_slice(&l.family.to_le_bytes());
buf.extend_from_slice(&l.socktype.to_le_bytes());
}
buf
}
pub fn save_compact_to_file(
snap: &CompactSnapshot,
path: &str,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom, Write};
let partial = format!("{path}.partial");
let mut f = std::fs::File::create(&partial)?;
let meta = encode_compact_meta(snap, 0);
f.write_all(&meta)?;
let cur = f.stream_position()? as u64;
let pad = (RAM_PAGE_ALIGN - (cur % RAM_PAGE_ALIGN)) % RAM_PAGE_ALIGN;
if pad > 0 {
f.write_all(&vec![0u8; pad as usize])?;
}
let ram_offset = f.stream_position()? as u64;
f.set_len(ram_offset + snap.ram_size as u64)?;
let n_threads = snapshot_write_threads();
let data_bytes = if n_threads <= 1 || snap.pages.len() < 64 {
let mut data_bytes = 0u64;
for (page_off, page) in &snap.pages {
f.seek(SeekFrom::Start(ram_offset + *page_off as u64))?;
f.write_all(page.as_ref())?;
data_bytes += COMPACT_PAGE_SIZE as u64;
}
data_bytes
} else {
save_compact_pages_parallel(&f, &snap.pages, ram_offset, n_threads)?
};
f.seek(SeekFrom::Start(64))?;
f.write_all(&ram_offset.to_le_bytes())?;
drop(f);
std::fs::rename(&partial, path)?;
Ok(SnapshotWriteStats {
ram_bytes: snap.ram_size as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: snap.ram_size as u64 - data_bytes,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn save_compact_pages_parallel(
f: &std::fs::File,
pages: &[(usize, Box<[u8; 4096]>)],
ram_offset: u64,
n_threads: usize,
) -> Result<u64, FileError> {
use std::os::unix::fs::FileExt;
let n = n_threads.max(1);
let per_thread = (pages.len() + n - 1) / n;
let total_data: u64 = std::thread::scope(|s| -> Result<u64, std::io::Error> {
let mut handles = Vec::with_capacity(n);
for chunk in pages.chunks(per_thread) {
let f_clone = f.try_clone()?;
let chunk_ref = chunk; let h = s.spawn(move || -> std::io::Result<u64> {
let mut bytes = 0u64;
for (page_off, page) in chunk_ref {
f_clone.write_all_at(
page.as_ref(),
ram_offset + *page_off as u64,
)?;
bytes += COMPACT_PAGE_SIZE as u64;
}
Ok(bytes)
});
handles.push(h);
}
let mut total = 0u64;
for h in handles {
total += h
.join()
.map_err(|_| std::io::Error::other("snapshot write thread panicked"))??;
}
Ok(total)
})?;
Ok(total_data)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn clonefile_via_libc(src: &str, dst: &str) -> std::io::Result<()> {
let src_c = std::ffi::CString::new(src).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"src path contains NUL byte",
)
})?;
let dst_c = std::ffi::CString::new(dst).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"dst path contains NUL byte",
)
})?;
let ret = unsafe { libc::clonefile(src_c.as_ptr(), dst_c.as_ptr(), 0) };
if ret != 0 {
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn load_compact_from_file(path: &str) -> Result<CompactSnapshot, FileError> {
use std::os::fd::AsRawFd;
let (snap, ram_offset, memory_bytes) = load_meta(path)?;
let f = std::fs::File::open(path)?;
let fd = f.as_raw_fd();
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
memory_bytes,
libc::PROT_READ,
libc::MAP_PRIVATE,
fd,
ram_offset as libc::off_t,
)
};
if ptr == libc::MAP_FAILED {
return Err(FileError::Io(std::io::Error::last_os_error()));
}
unsafe {
let _ = libc::madvise(ptr, memory_bytes, libc::MADV_SEQUENTIAL);
}
let memory: &[u8] = unsafe { std::slice::from_raw_parts(ptr as *const u8, memory_bytes) };
let n_pages = memory_bytes / COMPACT_PAGE_SIZE;
let mut pages: Vec<(usize, Box<[u8; 4096]>)> = Vec::with_capacity(n_pages / 20);
for page_idx in 0..n_pages {
let off = page_idx * COMPACT_PAGE_SIZE;
let chunk = &memory[off..off + COMPACT_PAGE_SIZE];
if !chunk.iter().all(|&b| b == 0) {
let mut page = Box::new([0u8; 4096]);
page.copy_from_slice(chunk);
pages.push((off, page));
}
}
unsafe {
libc::munmap(ptr, memory_bytes);
}
Ok(CompactSnapshot {
captured_mach_time: snap.captured_mach_time,
captured_cntvct: snap.captured_cntvct,
ram_gpa: snap.ram_gpa,
ram_size: memory_bytes,
gic_blob: snap.gic_blob,
per_vcpu: snap.per_vcpu,
virtio: snap.virtio,
pages,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn save_compact_to_file_via_clone(
snap: &CompactSnapshot,
base: &CompactSnapshot,
base_path: &str,
out_path: &str,
) -> Result<SnapshotWriteStats, FileError> {
use std::collections::{HashMap, HashSet};
use std::os::unix::fs::FileExt;
let partial = format!("{out_path}.partial");
let _ = std::fs::remove_file(&partial);
clonefile_via_libc(base_path, &partial).map_err(FileError::Io)?;
let f = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(&partial)?;
let mut ram_off_bytes = [0u8; 8];
f.read_exact_at(&mut ram_off_bytes, 64)?;
let ram_offset = u64::from_le_bytes(ram_off_bytes);
let mut base_ram_size_bytes = [0u8; 8];
f.read_exact_at(&mut base_ram_size_bytes, 32)?;
let base_ram_size = u64::from_le_bytes(base_ram_size_bytes);
if base_ram_size != snap.ram_size as u64 {
let _ = std::fs::remove_file(&partial);
return Err(FileError::Io(std::io::Error::other(format!(
"diff snapshot: base ram_size {base_ram_size} != warm ram_size {}",
snap.ram_size
))));
}
let meta = encode_compact_meta(snap, ram_offset);
if (meta.len() as u64) > ram_offset {
let _ = std::fs::remove_file(&partial);
return Err(FileError::Io(std::io::Error::other(format!(
"diff snapshot: warm meta {} bytes overflows base ram_offset {ram_offset}",
meta.len()
))));
}
let mut padded = meta;
padded.resize(ram_offset as usize, 0);
f.write_all_at(&padded, 0)?;
let base_lookup: HashMap<usize, &[u8; 4096]> = base
.pages
.iter()
.map(|(o, p)| (*o, p.as_ref()))
.collect();
let warm_offsets: HashSet<usize> = snap.pages.iter().map(|(o, _)| *o).collect();
let mut data_pages: Vec<(usize, &[u8; 4096])> = Vec::new();
for (off, warm_p) in &snap.pages {
let warm_bytes: &[u8; 4096] = warm_p.as_ref();
match base_lookup.get(off) {
None => data_pages.push((*off, warm_bytes)),
Some(b) if *b != warm_bytes => data_pages.push((*off, warm_bytes)),
_ => {}
}
}
let mut zero_offsets: Vec<usize> = Vec::new();
for (off, _) in &base.pages {
if !warm_offsets.contains(off) {
zero_offsets.push(*off);
}
}
zero_offsets.sort_unstable();
let n_threads = snapshot_write_threads();
let data_bytes_written = if n_threads <= 1 || data_pages.len() < 64 {
let mut bytes = 0u64;
for (off, page) in &data_pages {
f.write_all_at(*page, ram_offset + *off as u64)?;
bytes += COMPACT_PAGE_SIZE as u64;
}
bytes
} else {
save_diff_pages_parallel(&f, &data_pages, ram_offset, n_threads)?
};
let mut hole_bytes = 0u64;
if !zero_offsets.is_empty() {
use std::os::unix::io::AsRawFd;
let fd = f.as_raw_fd();
let mut i = 0;
while i < zero_offsets.len() {
let run_start = zero_offsets[i];
let mut run_end = run_start + COMPACT_PAGE_SIZE;
i += 1;
while i < zero_offsets.len() && zero_offsets[i] == run_end {
run_end += COMPACT_PAGE_SIZE;
i += 1;
}
let span = (run_end - run_start) as i64;
let punch = libc::fpunchhole_t {
fp_flags: 0,
reserved: 0,
fp_offset: (ram_offset + run_start as u64) as libc::off_t,
fp_length: span,
};
let r = unsafe {
libc::fcntl(
fd,
libc::F_PUNCHHOLE,
&punch as *const libc::fpunchhole_t,
)
};
if r == 0 {
hole_bytes += span as u64;
} else {
let zeros = vec![0u8; span as usize];
f.write_all_at(&zeros, ram_offset + run_start as u64)?;
}
}
}
let data_bytes = data_bytes_written + hole_bytes;
drop(f);
std::fs::rename(&partial, out_path)?;
Ok(SnapshotWriteStats {
ram_bytes: snap.ram_size as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: snap.ram_size as u64 - data_bytes,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn save_diff_pages_parallel(
f: &std::fs::File,
pages: &[(usize, &[u8; 4096])],
ram_offset: u64,
n_threads: usize,
) -> Result<u64, FileError> {
use std::os::unix::fs::FileExt;
let n = n_threads.max(1);
let per_thread = (pages.len() + n - 1) / n;
let total: u64 = std::thread::scope(|s| -> Result<u64, std::io::Error> {
let mut handles = Vec::with_capacity(n);
for chunk in pages.chunks(per_thread) {
let f_clone = f.try_clone()?;
let chunk_ref: &[(usize, &[u8; 4096])] = chunk;
let h = s.spawn(move || -> std::io::Result<u64> {
let mut bytes = 0u64;
for (off, page) in chunk_ref {
f_clone.write_all_at(*page, ram_offset + *off as u64)?;
bytes += COMPACT_PAGE_SIZE as u64;
}
Ok(bytes)
});
handles.push(h);
}
let mut total = 0u64;
for h in handles {
total += h
.join()
.map_err(|_| std::io::Error::other("snapshot diff write thread panicked"))??;
}
Ok(total)
})?;
Ok(total)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn capture_and_save_streaming(
vm: &MicroVm,
virtio: &VirtioSnapshot,
secondary_states: &[PerVcpuState],
path: &str,
) -> Result<SnapshotWriteStats, SnapshotStreamError> {
let per0 = capture_vcpu_state(&vm.vcpu).map_err(SnapshotStreamError::Hvf)?;
let gic_blob = hvf::gic_state_capture().map_err(SnapshotStreamError::Hvf)?;
let captured_mach_time = unsafe { mach_absolute_time() };
let captured_cntvct = captured_mach_time.wrapping_sub(per0.vtimer_offset);
let mut per_vcpu = Vec::with_capacity(1 + secondary_states.len());
per_vcpu.push(per0);
per_vcpu.extend_from_slice(secondary_states);
let memory: &[u8] = unsafe { std::slice::from_raw_parts(vm.ram_host, vm.ram_size) };
write_snapshot_file(
path,
captured_mach_time,
captured_cntvct,
vm.ram_gpa,
memory,
&gic_blob,
&per_vcpu,
virtio,
)
.map_err(SnapshotStreamError::Io)
}
#[derive(Debug)]
pub enum SnapshotStreamError {
Hvf(hvf::Error),
Io(FileError),
}
impl std::fmt::Display for SnapshotStreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Hvf(e) => write!(f, "snapshot capture: {e:?}"),
Self::Io(e) => write!(f, "snapshot save: {e:?}"),
}
}
}
pub fn save_to_file_with_stats(
path: &str,
snap: &Snapshot,
) -> Result<SnapshotWriteStats, FileError> {
write_snapshot_file(
path,
snap.captured_mach_time,
snap.captured_cntvct,
snap.ram_gpa,
&snap.memory,
&snap.gic_blob,
&snap.per_vcpu,
&snap.virtio,
)
}
#[allow(clippy::too_many_arguments)]
fn write_snapshot_file(
path: &str,
captured_mach_time: u64,
captured_cntvct: u64,
ram_gpa: u64,
memory: &[u8],
gic_blob: &[u8],
per_vcpu: &[PerVcpuState],
virtio: &VirtioSnapshot,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom};
let partial = format!("{path}.partial");
let mut f = std::fs::File::create(&partial)?;
let mut hdr = Vec::with_capacity(72);
hdr.extend_from_slice(&SNAPSHOT_MAGIC);
hdr.extend_from_slice(&SNAPSHOT_VERSION.to_le_bytes());
hdr.extend_from_slice(&captured_mach_time.to_le_bytes());
hdr.extend_from_slice(&captured_cntvct.to_le_bytes());
hdr.extend_from_slice(&(memory.len() as u64).to_le_bytes());
hdr.extend_from_slice(&(gic_blob.len() as u64).to_le_bytes());
hdr.extend_from_slice(&(per_vcpu.len() as u32).to_le_bytes());
hdr.extend_from_slice(&0u32.to_le_bytes()); hdr.extend_from_slice(&ram_gpa.to_le_bytes());
hdr.extend_from_slice(&0u64.to_le_bytes()); f.write_all(&hdr)?;
f.write_all(gic_blob)?;
for st in per_vcpu {
let mut p = Vec::with_capacity(64 + st.gp_regs.len() * 12 + st.simd_regs.len() * 20);
p.extend_from_slice(&st.vtimer_offset.to_le_bytes());
p.extend_from_slice(&(st.gp_regs.len() as u32).to_le_bytes());
p.extend_from_slice(&(st.simd_regs.len() as u32).to_le_bytes());
p.extend_from_slice(&(st.sys_regs.len() as u32).to_le_bytes());
p.extend_from_slice(&(st.icc_regs.len() as u32).to_le_bytes());
p.extend_from_slice(&(st.redist_regs.len() as u32).to_le_bytes());
for (id, v) in &st.gp_regs {
p.extend_from_slice(&id.to_le_bytes());
p.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.simd_regs {
p.extend_from_slice(&id.to_le_bytes());
p.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.sys_regs {
p.extend_from_slice(&id.to_le_bytes());
p.extend_from_slice(&v.to_le_bytes());
}
for (id, v) in &st.icc_regs {
p.extend_from_slice(&id.to_le_bytes());
p.extend_from_slice(&v.to_le_bytes());
}
for (off, v) in &st.redist_regs {
p.extend_from_slice(&off.to_le_bytes());
p.extend_from_slice(&v.to_le_bytes());
}
f.write_all(&p)?;
}
let mut v = Vec::with_capacity(
64 + virtio.mmio.len() * 256 + virtio.vsock_listeners.len() * 24,
);
v.extend_from_slice(&(virtio.mmio.len() as u32).to_le_bytes());
for m in &virtio.mmio {
v.extend_from_slice(&m.driver_features[0].to_le_bytes());
v.extend_from_slice(&m.driver_features[1].to_le_bytes());
v.extend_from_slice(&m.status.to_le_bytes());
v.extend_from_slice(&m.interrupt_status.to_le_bytes());
v.extend_from_slice(&(m.queues.len() as u32).to_le_bytes());
for q in &m.queues {
v.extend_from_slice(&q.size.to_le_bytes());
v.push(if q.ready { 1 } else { 0 });
v.push(0); v.extend_from_slice(&q.desc_table.to_le_bytes());
v.extend_from_slice(&q.avail_ring.to_le_bytes());
v.extend_from_slice(&q.used_ring.to_le_bytes());
v.extend_from_slice(&q.last_avail_idx.to_le_bytes());
v.extend_from_slice(&q.next_used_idx.to_le_bytes());
}
}
v.extend_from_slice(&(virtio.vsock_listeners.len() as u32).to_le_bytes());
for l in &virtio.vsock_listeners {
v.extend_from_slice(&l.cid.to_le_bytes());
v.extend_from_slice(&l.peer_port.to_le_bytes());
v.extend_from_slice(&l.vm_port.to_le_bytes());
v.extend_from_slice(&l.family.to_le_bytes());
v.extend_from_slice(&l.socktype.to_le_bytes());
}
f.write_all(&v)?;
let cur = f.stream_position()? as u64;
let pad = (RAM_PAGE_ALIGN - (cur % RAM_PAGE_ALIGN)) % RAM_PAGE_ALIGN;
if pad > 0 {
f.write_all(&vec![0u8; pad as usize])?;
}
let ram_offset = f.stream_position()? as u64;
let stats = write_sparse_ram(&mut f, memory, ram_offset)?;
f.seek(SeekFrom::Start(64))?;
f.write_all(&ram_offset.to_le_bytes())?;
drop(f);
std::fs::rename(&partial, path)?;
Ok(stats)
}
fn snapshot_write_threads() -> usize {
if let Ok(v) = std::env::var("SUPERMACHINE_SNAPSHOT_WRITE_THREADS") {
if let Ok(n) = v.parse::<usize>() {
return n.max(1);
}
}
4
}
fn write_sparse_ram(
f: &mut std::fs::File,
memory: &[u8],
ram_offset: u64,
) -> Result<SnapshotWriteStats, FileError> {
use std::io::{Seek, SeekFrom};
let n_threads = snapshot_write_threads();
let total = memory.len();
let logical_end = ram_offset + total as u64;
f.set_len(logical_end)?;
let stats = if n_threads <= 1 {
write_sparse_ram_serial(f, memory, ram_offset)?
} else {
write_sparse_ram_parallel(f, memory, ram_offset, n_threads)?
};
f.seek(SeekFrom::Start(logical_end))?;
Ok(stats)
}
fn write_sparse_ram_serial(
f: &std::fs::File,
memory: &[u8],
ram_offset: u64,
) -> Result<SnapshotWriteStats, FileError> {
use std::os::unix::fs::FileExt;
let mut pos = 0usize;
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
while pos < memory.len() {
let run_is_zero =
is_zero_chunk(&memory[pos..(pos + (memory.len() - pos).min(SPARSE_RAM_CHUNK))]);
let run_start = pos;
pos += (memory.len() - pos).min(SPARSE_RAM_CHUNK);
while pos < memory.len() {
let next_len = (memory.len() - pos).min(SPARSE_RAM_CHUNK);
let next_is_zero = is_zero_chunk(&memory[pos..pos + next_len]);
if next_is_zero != run_is_zero {
break;
}
pos += next_len;
}
let run_len = pos - run_start;
if run_is_zero {
zero_bytes += run_len as u64;
} else {
f.write_all_at(&memory[run_start..pos], ram_offset + run_start as u64)?;
data_bytes += run_len as u64;
}
}
Ok(SnapshotWriteStats {
ram_bytes: memory.len() as u64,
ram_data_bytes: data_bytes,
ram_zero_bytes: zero_bytes,
})
}
fn write_sparse_ram_parallel(
f: &std::fs::File,
memory: &[u8],
ram_offset: u64,
n_threads: usize,
) -> Result<SnapshotWriteStats, FileError> {
let total = memory.len();
let slab = ((total + n_threads - 1) / n_threads + SPARSE_RAM_CHUNK - 1)
/ SPARSE_RAM_CHUNK
* SPARSE_RAM_CHUNK;
let mut bounds: Vec<(usize, usize)> = Vec::with_capacity(n_threads);
let mut start = 0usize;
while start < total {
let end = (start + slab).min(total);
bounds.push((start, end));
start = end;
}
let mem_ptr = memory.as_ptr() as usize;
let mem_len = memory.len();
let stats = std::thread::scope(|s| -> Result<(u64, u64), std::io::Error> {
let mut handles = Vec::with_capacity(bounds.len());
for (slab_start, slab_end) in bounds {
let f_clone = f.try_clone()?;
let h = s.spawn(move || -> std::io::Result<(u64, u64)> {
let _ = mem_len;
let slab: &[u8] = unsafe {
std::slice::from_raw_parts(
(mem_ptr as *const u8).add(slab_start),
slab_end - slab_start,
)
};
let base = ram_offset + slab_start as u64;
write_sparse_ram_slab(&f_clone, slab, base)
});
handles.push(h);
}
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
for h in handles {
let (d, z) = h
.join()
.map_err(|_| std::io::Error::other("snapshot write thread panicked"))??;
data_bytes += d;
zero_bytes += z;
}
Ok((data_bytes, zero_bytes))
})?;
Ok(SnapshotWriteStats {
ram_bytes: total as u64,
ram_data_bytes: stats.0,
ram_zero_bytes: stats.1,
})
}
fn write_sparse_ram_slab(
f: &std::fs::File,
memory: &[u8],
base_offset: u64,
) -> std::io::Result<(u64, u64)> {
use std::os::unix::fs::FileExt;
let mut pos = 0usize;
let mut data_bytes = 0u64;
let mut zero_bytes = 0u64;
while pos < memory.len() {
let run_is_zero =
is_zero_chunk(&memory[pos..(pos + (memory.len() - pos).min(SPARSE_RAM_CHUNK))]);
let run_start = pos;
pos += (memory.len() - pos).min(SPARSE_RAM_CHUNK);
while pos < memory.len() {
let next_len = (memory.len() - pos).min(SPARSE_RAM_CHUNK);
let next_is_zero = is_zero_chunk(&memory[pos..pos + next_len]);
if next_is_zero != run_is_zero {
break;
}
pos += next_len;
}
let run_len = pos - run_start;
if run_is_zero {
zero_bytes += run_len as u64;
} else {
f.write_all_at(&memory[run_start..pos], base_offset + run_start as u64)?;
data_bytes += run_len as u64;
}
}
Ok((data_bytes, zero_bytes))
}
fn is_zero_chunk(chunk: &[u8]) -> bool {
chunk.iter().all(|b| *b == 0)
}
pub fn load_from_file(path: &str) -> Result<Snapshot, FileError> {
load_from_file_inner(path, false).map(|(snap, _, _)| snap)
}
pub fn load_meta(path: &str) -> Result<(Snapshot, u64, usize), FileError> {
load_from_file_inner(path, true)
}
fn load_from_file_inner(path: &str, skip_ram: bool) -> Result<(Snapshot, u64, usize), FileError> {
use std::io::{Seek, SeekFrom};
let mut f = std::fs::File::open(path)?;
let mut hdr = [0u8; 72];
f.read_exact(&mut hdr).map_err(|_| FileError::Truncated)?;
if hdr[0..8] != SNAPSHOT_MAGIC {
return Err(FileError::BadMagic);
}
let version = le_u64(&hdr[8..16])?;
if version != SNAPSHOT_VERSION {
return Err(FileError::BadVersion(version));
}
let captured_mach_time = le_u64(&hdr[16..24])?;
let captured_cntvct = le_u64(&hdr[24..32])?;
let memory_bytes = le_u64(&hdr[32..40])? as usize;
let gic_blob_len = le_u64(&hdr[40..48])? as usize;
let n_vcpus = le_u32(&hdr[48..52])? as usize;
if n_vcpus == 0 {
return Err(FileError::Malformed("snapshot contains no vCPU state"));
}
let ram_gpa = le_u64(&hdr[56..64])?;
let ram_offset = le_u64(&hdr[64..72])?;
let mut gic_blob = vec![0u8; gic_blob_len];
f.read_exact(&mut gic_blob)
.map_err(|_| FileError::Truncated)?;
let mut per_vcpu = Vec::with_capacity(n_vcpus);
for _ in 0..n_vcpus {
let mut hh = [0u8; 28];
f.read_exact(&mut hh).map_err(|_| FileError::Truncated)?;
let vtimer_offset = le_u64(&hh[0..8])?;
let gp_n = le_u32(&hh[8..12])? as usize;
let simd_n = le_u32(&hh[12..16])? as usize;
let sys_n = le_u32(&hh[16..20])? as usize;
let icc_n = le_u32(&hh[20..24])? as usize;
let redist_n = le_u32(&hh[24..28])? as usize;
let mut gp_regs = Vec::with_capacity(gp_n);
for _ in 0..gp_n {
let mut e = [0u8; 12];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
gp_regs.push((le_u32(&e[0..4])?, le_u64(&e[4..12])?));
}
let mut simd_regs = Vec::with_capacity(simd_n);
for _ in 0..simd_n {
let mut e = [0u8; 20];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
simd_regs.push((le_u32(&e[0..4])?, le_u128(&e[4..20])?));
}
let mut sys_regs = Vec::with_capacity(sys_n);
for _ in 0..sys_n {
let mut e = [0u8; 12];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
sys_regs.push((le_u32(&e[0..4])?, le_u64(&e[4..12])?));
}
let mut icc_regs = Vec::with_capacity(icc_n);
for _ in 0..icc_n {
let mut e = [0u8; 12];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
icc_regs.push((le_u32(&e[0..4])?, le_u64(&e[4..12])?));
}
let mut redist_regs = Vec::with_capacity(redist_n);
for _ in 0..redist_n {
let mut e = [0u8; 12];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
redist_regs.push((le_u32(&e[0..4])?, le_u64(&e[4..12])?));
}
per_vcpu.push(PerVcpuState {
gp_regs,
simd_regs,
sys_regs,
icc_regs,
redist_regs,
vtimer_offset,
});
}
let mut buf4 = [0u8; 4];
f.read_exact(&mut buf4).map_err(|_| FileError::Truncated)?;
let n_mmio = u32::from_le_bytes(buf4) as usize;
let mut mmio = Vec::with_capacity(n_mmio);
for _ in 0..n_mmio {
let mut h = [0u8; 20];
f.read_exact(&mut h).map_err(|_| FileError::Truncated)?;
let driver_features = [le_u32(&h[0..4])?, le_u32(&h[4..8])?];
let status = le_u32(&h[8..12])?;
let interrupt_status = le_u32(&h[12..16])?;
let n_q = le_u32(&h[16..20])? as usize;
let mut queues = Vec::with_capacity(n_q);
for _ in 0..n_q {
let mut q = [0u8; 32];
f.read_exact(&mut q).map_err(|_| FileError::Truncated)?;
queues.push(QueueSnapshot {
size: le_u16(&q[0..2])?,
ready: q[2] != 0,
desc_table: le_u64(&q[4..12])?,
avail_ring: le_u64(&q[12..20])?,
used_ring: le_u64(&q[20..28])?,
last_avail_idx: le_u16(&q[28..30])?,
next_used_idx: le_u16(&q[30..32])?,
});
}
mmio.push(MmioSnapshot {
driver_features,
status,
interrupt_status,
queues,
});
}
f.read_exact(&mut buf4).map_err(|_| FileError::Truncated)?;
let n_lis = u32::from_le_bytes(buf4) as usize;
let mut vsock_listeners = Vec::with_capacity(n_lis);
for _ in 0..n_lis {
let mut e = [0u8; 20];
f.read_exact(&mut e).map_err(|_| FileError::Truncated)?;
vsock_listeners.push(TsiListenerSnapshot {
cid: le_u64(&e[0..8])?,
peer_port: le_u32(&e[8..12])?,
vm_port: le_u32(&e[12..16])?,
family: le_u16(&e[16..18])?,
socktype: le_u16(&e[18..20])?,
});
}
let memory = if skip_ram {
Vec::new()
} else {
f.seek(SeekFrom::Start(ram_offset))
.map_err(|_| FileError::Truncated)?;
let mut m = vec![0u8; memory_bytes];
f.read_exact(&mut m).map_err(|_| FileError::Truncated)?;
m
};
Ok((
Snapshot {
captured_mach_time,
captured_cntvct,
ram_gpa,
memory,
gic_blob,
per_vcpu,
virtio: VirtioSnapshot {
mmio,
vsock_listeners,
},
},
ram_offset,
memory_bytes,
))
}
pub fn mmap_ram_cow(path: &str) -> std::io::Result<(*mut u8, usize)> {
let (_snap, ram_offset, memory_bytes) =
load_meta(path).map_err(|e| std::io::Error::other(format!("load_meta: {e:?}")))?;
mmap_ram_cow_at(path, ram_offset, memory_bytes)
}
pub fn mmap_ram_cow_at(
path: &str,
ram_offset: u64,
memory_bytes: usize,
) -> std::io::Result<(*mut u8, usize)> {
let f = std::fs::File::open(path)?;
let fd = std::os::fd::AsRawFd::as_raw_fd(&f);
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
memory_bytes,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE,
fd,
ram_offset as libc::off_t,
)
};
if ptr == libc::MAP_FAILED {
return Err(std::io::Error::last_os_error());
}
Ok((ptr as *mut u8, memory_bytes))
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use std::path::PathBuf;
fn temp_snapshot_path(name: &str) -> PathBuf {
let mut path = std::env::temp_dir();
path.push(format!(
"snapshot-{name}-{}-{:?}.snap",
std::process::id(),
std::thread::current().id()
));
path
}
fn write_bytes(path: &PathBuf, bytes: &[u8]) -> std::io::Result<()> {
let mut file = std::fs::File::create(path)?;
file.write_all(bytes)
}
fn minimal_header(n_vcpus: u32) -> [u8; 72] {
let mut hdr = [0u8; 72];
hdr[0..8].copy_from_slice(&SNAPSHOT_MAGIC);
hdr[8..16].copy_from_slice(&SNAPSHOT_VERSION.to_le_bytes());
hdr[32..40].copy_from_slice(&0u64.to_le_bytes());
hdr[40..48].copy_from_slice(&0u64.to_le_bytes());
hdr[48..52].copy_from_slice(&n_vcpus.to_le_bytes());
hdr[64..72].copy_from_slice(&72u64.to_le_bytes());
hdr
}
#[test]
fn load_rejects_truncated_snapshot() -> std::io::Result<()> {
let path = temp_snapshot_path("truncated");
write_bytes(&path, b"SMS")?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(result, Err(FileError::Truncated)));
Ok(())
}
#[test]
fn load_rejects_bad_magic() -> std::io::Result<()> {
let path = temp_snapshot_path("bad-magic");
write_bytes(&path, &[0u8; 72])?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(result, Err(FileError::BadMagic)));
Ok(())
}
#[test]
fn load_rejects_missing_vcpu_state() -> std::io::Result<()> {
let path = temp_snapshot_path("no-vcpu");
write_bytes(&path, &minimal_header(0))?;
let result = load_from_file(path.to_str().unwrap_or_default());
let _ = std::fs::remove_file(path);
assert!(matches!(
result,
Err(FileError::Malformed("snapshot contains no vCPU state"))
));
Ok(())
}
}