use std::fmt;
use std::sync::Arc;
use crate::devices::mmio_bus::MmioBus;
use crate::devices::virtio::mmio::MmioVirtio;
use crate::devices::virtio::vsock::device::Vsock;
use crate::hvf::Vcpu;
use crate::vmm::coord::VcpuCoordinator;
use crate::vmm::vstate::MicroVm;
pub struct DispatchSnapshot<'a> {
pub after_ms: Option<u64>,
pub at_heartbeat: Option<u64>,
pub on_listener: bool,
pub quiesce_ms: u64,
pub out_path: Option<&'a str>,
pub stop_requested: Option<&'a std::sync::atomic::AtomicBool>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DispatchExit {
Canceled,
Stopped,
SnapshotSaved,
SystemOff,
}
#[derive(Debug)]
pub enum WorkerError {
ListenerReadinessTimeout {
after_ms: u64,
},
QuiesceThreadSpawn(std::io::Error),
SnapshotCapture(crate::hvf::Error),
SnapshotSave {
path: String,
source: crate::vmm::snapshot::FileError,
},
VcpuCreate {
idx: u32,
source: crate::hvf::Error,
},
VcpuRun {
idx: u32,
source: crate::hvf::Error,
},
VcpuSetup {
idx: u32,
source: crate::hvf::Error,
},
VtimerMask(crate::hvf::Error),
}
impl fmt::Display for WorkerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
WorkerError::ListenerReadinessTimeout { after_ms } => write!(
f,
"listener readiness timeout after {after_ms} ms; refusing zero-listener snapshot"
),
WorkerError::QuiesceThreadSpawn(e) => write!(f, "spawn quiesce canceller: {e}"),
WorkerError::SnapshotCapture(e) => write!(f, "capture snapshot: {e:?}"),
WorkerError::SnapshotSave { path, source } => {
write!(f, "save snapshot {path}: {source:?}")
}
WorkerError::VcpuCreate { idx, source } => {
write!(f, "create vCPU {idx}: {source:?}")
}
WorkerError::VcpuRun { idx, source } => {
write!(f, "run vCPU {idx}: {source:?}")
}
WorkerError::VcpuSetup { idx, source } => {
write!(f, "setup vCPU {idx}: {source:?}")
}
WorkerError::VtimerMask(e) => write!(f, "set vtimer mask: {e:?}"),
}
}
}
impl std::error::Error for WorkerError {}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum StepExit {
Canceled,
Continue,
Stop,
SystemOff,
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn run_secondary(
idx: u32,
coord: Arc<VcpuCoordinator>,
bus: Arc<MmioBus>,
restore_state: Option<crate::vmm::snapshot::PerVcpuState>,
) {
if let Err(e) = run_secondary_inner(idx, coord, bus, restore_state) {
eprintln!(" [vcpu-{idx}] worker error: {e}");
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn run_secondary_inner(
idx: u32,
coord: Arc<VcpuCoordinator>,
bus: Arc<MmioBus>,
restore_state: Option<crate::vmm::snapshot::PerVcpuState>,
) -> Result<(), WorkerError> {
use applevisor_sys as av;
let vcpu = Vcpu::new().map_err(|source| WorkerError::VcpuCreate { idx, source })?;
vcpu.set_sys_reg(av::hv_sys_reg_t::MPIDR_EL1, idx as u64)
.map_err(|source| WorkerError::VcpuSetup { idx, source })?;
coord.register_secondary(vcpu.handle());
if let Some(st) = restore_state {
eprintln!(" [vcpu-{idx}] restoring from snapshot");
crate::vmm::snapshot::restore_vcpu_state(&vcpu, &st)
.map_err(|source| WorkerError::VcpuSetup { idx, source })?;
coord.slots[idx as usize]
.on
.store(true, std::sync::atomic::Ordering::SeqCst);
return dispatch_vcpu_inner(idx, &vcpu, &bus, &coord);
}
eprintln!(" [vcpu-{idx}] parked, waiting for PSCI CPU_ON");
let Some((entry, ctx_id)) = coord.wait_for_run(idx) else {
return Ok(());
};
eprintln!(" [vcpu-{idx}] CPU_ON entry=0x{entry:x} ctx=0x{ctx_id:x}");
vcpu.set_reg(av::hv_reg_t::CPSR, 0x3c5).ok();
vcpu.set_reg(av::hv_reg_t::PC, entry).ok();
vcpu.set_reg(av::hv_reg_t::X0, ctx_id).ok();
vcpu.set_reg(av::hv_reg_t::X1, 0).ok();
vcpu.set_reg(av::hv_reg_t::X2, 0).ok();
vcpu.set_reg(av::hv_reg_t::X3, 0).ok();
dispatch_vcpu_inner(idx, &vcpu, &bus, &coord)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn dispatch_vcpu(
idx: u32,
vcpu: &Vcpu,
bus: &Arc<MmioBus>,
coord: &Arc<VcpuCoordinator>,
all_mmio: &[Arc<MmioVirtio>],
vsock: &Arc<Vsock>,
vm: &MicroVm,
snapshot: DispatchSnapshot<'_>,
) -> Result<DispatchExit, WorkerError> {
use crate::devices::serial::HEARTBEAT_COUNT;
use crate::vmm::snapshot;
use std::sync::atomic::Ordering;
let dispatch_start = std::time::Instant::now();
let mut quiesced = false;
let take_snapshot = |reason: &str| -> Result<bool, WorkerError> {
let Some(out) = snapshot.out_path else {
return Ok(false);
};
eprintln!(" snapshot trigger ({reason})");
let t0 = std::time::Instant::now();
let secondary_handles = coord.secondary_handles_snapshot();
if !secondary_handles.is_empty() {
coord.request_snapshot_pause(&secondary_handles);
}
let virtio = snapshot::VirtioSnapshot {
mmio: all_mmio.iter().map(|m| m.capture_state()).collect(),
vsock_listeners: vsock.muxer().capture_tsi_listeners(),
};
let mut snap = snapshot::capture_snapshot(vm, virtio).map_err(WorkerError::SnapshotCapture)?;
if !secondary_handles.is_empty() {
let secondaries = coord.take_secondary_states();
snap.per_vcpu.extend(secondaries);
coord.release_after_snapshot();
}
let cap_us = t0.elapsed().as_micros();
let t1 = std::time::Instant::now();
let write_stats = snapshot::save_to_file_with_stats(out, &snap).map_err(|source| {
WorkerError::SnapshotSave {
path: out.to_string(),
source,
}
})?;
let save_us = t1.elapsed().as_micros();
eprintln!("\n snapshot ({reason}): capture {cap_us} us, save {save_us} us, RAM {} MiB (data {} MiB, zero {} MiB), GIC {} B, mmio={} listeners={} -> {out}",
write_stats.ram_bytes / (1024*1024),
write_stats.ram_data_bytes / (1024*1024),
write_stats.ram_zero_bytes / (1024*1024),
snap.gic_blob.len(),
snap.virtio.mmio.len(), snap.virtio.vsock_listeners.len());
Ok(true)
};
let mut listener_quiesced = false;
loop {
if snapshot
.stop_requested
.is_some_and(|flag| flag.load(Ordering::SeqCst))
{
return Ok(DispatchExit::Stopped);
}
if let (Some(target), Some(_)) = (snapshot.at_heartbeat, snapshot.out_path) {
if HEARTBEAT_COUNT.load(Ordering::SeqCst) >= target {
if !quiesced && snapshot.quiesce_ms > 0 {
eprintln!(
" quiescing for {} ms before snapshot...",
snapshot.quiesce_ms
);
if let Some(exit) = quiesce_to_wfi(vcpu, bus, coord, snapshot.quiesce_ms)? {
return Ok(exit);
}
quiesced = true;
}
if take_snapshot("heartbeat")? {
return Ok(DispatchExit::SnapshotSaved);
}
}
}
if snapshot.on_listener && snapshot.out_path.is_some() {
let listeners = vsock.muxer().listener_count();
if listeners > 0 {
if !listener_quiesced {
eprintln!(" listener readiness: {listeners} TSI listener(s)");
if snapshot.quiesce_ms > 0 {
eprintln!(
" quiescing listener-ready guest for {} ms before snapshot...",
snapshot.quiesce_ms
);
if let Some(exit) = quiesce_to_wfi(vcpu, bus, coord, snapshot.quiesce_ms)? {
return Ok(exit);
}
}
listener_quiesced = true;
}
if take_snapshot("listener-ready")? {
return Ok(DispatchExit::SnapshotSaved);
}
}
}
if let (Some(after_ms), Some(_)) = (snapshot.after_ms, snapshot.out_path) {
if dispatch_start.elapsed().as_millis() as u64 >= after_ms {
if snapshot.on_listener && vsock.muxer().listener_count() == 0 {
eprintln!(
" no listener after {after_ms} ms; falling back to init-state snapshot \
(typical for non-service images like rust:1-slim)"
);
}
if take_snapshot("wall-clock")? {
return Ok(DispatchExit::SnapshotSaved);
}
}
}
match vcpu_step(idx, vcpu, bus, coord)? {
StepExit::Canceled => return Ok(DispatchExit::Canceled),
StepExit::Continue => {}
StepExit::Stop => return Ok(DispatchExit::Stopped),
StepExit::SystemOff => return Ok(DispatchExit::SystemOff),
}
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn quiesce_to_wfi(
vcpu: &Vcpu,
bus: &MmioBus,
coord: &VcpuCoordinator,
wait_ms: u64,
) -> Result<Option<DispatchExit>, WorkerError> {
use applevisor_sys as av;
vcpu.set_vtimer_mask(true)
.map_err(WorkerError::VtimerMask)?;
let h = vcpu.handle();
let canceller = std::thread::Builder::new()
.name("quiesce-canceller".into())
.spawn(move || {
std::thread::sleep(std::time::Duration::from_millis(wait_ms));
unsafe {
let _ = av::hv_vcpus_exit(&h, 1);
}
})
.map_err(WorkerError::QuiesceThreadSpawn)?;
let result = loop {
match vcpu_step(0, vcpu, bus, coord)? {
StepExit::Canceled => break Ok(None),
StepExit::Continue => {}
StepExit::Stop => break Ok(None),
StepExit::SystemOff => break Ok(Some(DispatchExit::SystemOff)),
}
};
let _ = canceller.join();
let unmask = vcpu.set_vtimer_mask(false).map_err(WorkerError::VtimerMask);
match (result, unmask) {
(Err(e), _) => Err(e),
(Ok(_), Err(e)) => Err(e),
(Ok(exit), Ok(())) => Ok(exit),
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn dispatch_vcpu_inner(
idx: u32,
vcpu: &Vcpu,
bus: &Arc<MmioBus>,
coord: &Arc<VcpuCoordinator>,
) -> Result<(), WorkerError> {
use std::sync::atomic::Ordering;
loop {
coord
.maybe_pause_for_snapshot(idx, vcpu)
.map_err(WorkerError::SnapshotCapture)?;
match vcpu_step(idx, vcpu, bus, coord)? {
StepExit::Canceled => {
if coord.snapshot_request.load(Ordering::Acquire) {
continue;
}
return Ok(());
}
StepExit::Continue => {}
StepExit::Stop | StepExit::SystemOff => return Ok(()),
}
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn vcpu_step(
idx: u32,
vcpu: &Vcpu,
bus: &MmioBus,
coord: &VcpuCoordinator,
) -> Result<StepExit, WorkerError> {
use crate::vmm::coord::*;
use crate::vmm::exit_profile::{self, Stage};
use applevisor_sys as av;
let run_t0 = std::time::Instant::now();
let exit = vcpu
.run()
.map_err(|source| WorkerError::VcpuRun { idx, source })?;
exit_profile::record(Stage::VcpuRun, run_t0.elapsed().as_micros() as u64);
let reason = crate::hvf::ExitReason::from(exit.reason as u32);
let esr = exit.exception.syndrome;
let gpa = exit.exception.physical_address;
let ec = (esr >> 26) & 0x3f;
match reason {
crate::hvf::ExitReason::Exception => {
if ec == 0x24 {
exit_profile::record(Stage::DataAbort, 0);
handle_data_abort_vcpu(vcpu, bus, esr, gpa);
} else if ec == 0x16 {
exit_profile::record(Stage::Hvc, 0);
let fid = vcpu.get_reg(av::hv_reg_t::X0).unwrap_or(0) as u32;
let ret: i64 = match fid {
PSCI_VERSION => 0x10000, PSCI_FEATURES => {
let q = vcpu.get_reg(av::hv_reg_t::X1).unwrap_or(0) as u32;
match q {
PSCI_VERSION | PSCI_CPU_ON | PSCI_CPU_OFF | PSCI_AFFINITY_INFO
| PSCI_FEATURES | PSCI_SYSTEM_OFF | PSCI_SYSTEM_RESET => 0,
_ => PSCI_NOT_SUPPORTED,
}
}
PSCI_CPU_ON => {
let target = vcpu.get_reg(av::hv_reg_t::X1).unwrap_or(0) as u32;
let entry = vcpu.get_reg(av::hv_reg_t::X2).unwrap_or(0);
let ctx_id = vcpu.get_reg(av::hv_reg_t::X3).unwrap_or(0);
eprintln!(" [vcpu-{idx}] PSCI CPU_ON target={target} entry=0x{entry:x}");
coord.cpu_on(target, entry, ctx_id)
}
PSCI_CPU_OFF => {
eprintln!(" [vcpu-{idx}] PSCI CPU_OFF");
return Ok(StepExit::Stop);
}
PSCI_AFFINITY_INFO => {
let target = vcpu.get_reg(av::hv_reg_t::X1).unwrap_or(0) as u32;
coord.affinity_info(target)
}
PSCI_SYSTEM_OFF | PSCI_SYSTEM_RESET => {
eprintln!(" [vcpu-{idx}] PSCI SYSTEM_OFF/RESET - exiting");
return Ok(StepExit::SystemOff);
}
_ => PSCI_NOT_SUPPORTED,
};
vcpu.set_reg(av::hv_reg_t::X0, ret as u64).ok();
} else if ec == 0x18 {
exit_profile::record(Stage::Svc, 0);
let pc = vcpu.get_reg(av::hv_reg_t::PC).unwrap_or(0);
let iss = esr & 0x01ff_ffff;
let rt = ((iss >> 5) & 0x1f) as u32;
let is_read = (iss & 1) != 0;
if is_read && rt < 31 {
vcpu.set_x(rt, 0).ok();
}
vcpu.set_reg(av::hv_reg_t::PC, pc + 4).ok();
} else if ec == 0x17 {
let pc = vcpu.get_reg(av::hv_reg_t::PC).unwrap_or(0);
vcpu.set_x(0, (-1i64) as u64).ok();
vcpu.set_reg(av::hv_reg_t::PC, pc + 4).ok();
} else {
let pc = vcpu.get_reg(av::hv_reg_t::PC).unwrap_or(0);
eprintln!(" [vcpu-{idx}] unhandled EC={ec:#x} ESR=0x{esr:x} PC=0x{pc:x}");
return Ok(StepExit::Stop);
}
}
crate::hvf::ExitReason::VTimerActivated => {
exit_profile::record(Stage::Vtimer, 0);
}
crate::hvf::ExitReason::Canceled => {
eprintln!(" [vcpu-{idx}] canceled");
return Ok(StepExit::Canceled);
}
crate::hvf::ExitReason::Unknown(v) => {
eprintln!(" [vcpu-{idx}] unknown exit {v}");
return Ok(StepExit::Stop);
}
}
Ok(StepExit::Continue)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn handle_data_abort_vcpu(vcpu: &Vcpu, bus: &MmioBus, esr: u64, gpa: u64) {
use crate::vmm::exit_profile;
use applevisor_sys as av;
let pc = vcpu.get_reg(av::hv_reg_t::PC).unwrap_or(0);
let far = gpa;
let iss = esr & 0x01ff_ffff;
let isv = ((iss >> 24) & 1) != 0;
if !isv {
vcpu.set_reg(av::hv_reg_t::PC, pc + 4).ok();
return;
}
let sas = ((iss >> 22) & 0x3) as u8;
let size: u8 = 1 << sas;
let srt = ((iss >> 16) & 0x1f) as u32;
let wnr = ((iss >> 6) & 1) != 0;
if wnr {
let val = vcpu.get_x(srt).unwrap_or(0);
let t0 = std::time::Instant::now();
let handled = bus.write(far, val, size);
exit_profile::record(
exit_profile::mmio_stage(far, true),
t0.elapsed().as_micros() as u64,
);
if !handled && std::env::var("SUPERMACHINE_TRACE").is_ok() {
eprintln!("MMIO W {far:#x} = {val:#x} sz={size} (unhandled)");
}
} else {
let t0 = std::time::Instant::now();
let val = bus.read(far, size);
exit_profile::record(
exit_profile::mmio_stage(far, false),
t0.elapsed().as_micros() as u64,
);
if val.is_none() && std::env::var("SUPERMACHINE_TRACE").is_ok() {
eprintln!("MMIO R {far:#x} sz={size} (unhandled, returning 0)");
}
let val = val.unwrap_or(0);
if srt < 31 {
vcpu.set_x(srt, val).ok();
}
}
vcpu.set_reg(av::hv_reg_t::PC, pc + 4).ok();
}