use std::fmt;
use std::sync::Arc;
use crate::arch::aarch64::fdt;
use crate::arch::aarch64::fdt::VirtioMmioEntry;
use crate::arch::aarch64::layout;
use crate::devices::mmio_bus::MmioBus;
use crate::devices::serial::SerialPl011;
use crate::devices::virtio::balloon::{VirtioBalloon, VirtioBalloonWithRam};
use crate::devices::virtio::blk::VirtioBlk;
use crate::devices::virtio::mmio::MmioVirtio;
use crate::devices::virtio::queue::GuestMem;
use crate::devices::virtio::rng::VirtioRng;
use crate::devices::virtio::vsock::device::Vsock as VirtioVsock;
use crate::kernel::loader;
use crate::vmm::coord::VcpuCoordinator;
use crate::vmm::resources::VmResources;
use crate::vmm::snapshot;
use crate::vmm::vstate::{boot_linux, MicroVm};
#[derive(Clone)]
pub struct VirtioMmioPlan {
pub entries: Vec<VirtioMmioEntry>,
pub rng_base: u64,
pub rng_irq: u32,
pub balloon_base: u64,
pub balloon_irq: u32,
}
pub struct DeviceSet {
pub bus: MmioBus,
pub all_mmio: Vec<Arc<MmioVirtio>>,
pub vsock: Arc<VirtioVsock>,
}
pub struct Vmm {
pub vm: MicroVm,
pub bus: Arc<MmioBus>,
pub all_mmio: Vec<Arc<MmioVirtio>>,
pub vsock: Arc<VirtioVsock>,
pub coord: Arc<VcpuCoordinator>,
}
#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
pub struct VmmRestoreTimings {
pub ram_copy_us: u128,
pub gic_restore_us: u128,
pub vcpu_restore_us: u128,
pub vtimer_offset_us: u128,
pub mmio_restore_us: u128,
pub listener_restore_us: u128,
}
pub struct BuiltVm {
pub vm: MicroVm,
pub virtio_plan: VirtioMmioPlan,
}
#[derive(Debug)]
pub enum BuildError {
MissingKernel,
KernelImage {
path: String,
source: std::io::Error,
},
Initramfs {
path: String,
source: std::io::Error,
},
Fdt(std::io::Error),
Hvf(crate::hvf::Error),
BlockDevice {
path: String,
source: std::io::Error,
},
VsockMuxer(crate::devices::virtio::vsock::muxer_thread::StartError),
}
impl fmt::Display for BuildError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BuildError::MissingKernel => write!(f, "kernel path is required for cold boot"),
BuildError::KernelImage { path, source } => {
write!(f, "read kernel image {path}: {source}")
}
BuildError::Initramfs { path, source } => {
write!(f, "read initramfs {path}: {source}")
}
BuildError::Fdt(e) => write!(f, "generate FDT: {e}"),
BuildError::Hvf(e) => write!(f, "HVF operation failed: {e:?}"),
BuildError::BlockDevice { path, source } => {
write!(f, "open block device {path}: {source}")
}
BuildError::VsockMuxer(e) => write!(f, "{e}"),
}
}
}
impl std::error::Error for BuildError {}
impl From<crate::hvf::Error> for BuildError {
fn from(value: crate::hvf::Error) -> Self {
Self::Hvf(value)
}
}
impl From<crate::devices::virtio::vsock::muxer_thread::StartError> for BuildError {
fn from(value: crate::devices::virtio::vsock::muxer_thread::StartError) -> Self {
Self::VsockMuxer(value)
}
}
impl Vmm {
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn restore_snapshot(&self, snap: &snapshot::Snapshot) -> crate::hvf::Result<()> {
self.restore_snapshot_timed(snap).map(|_| ())
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn restore_snapshot_timed(
&self,
snap: &snapshot::Snapshot,
) -> crate::hvf::Result<VmmRestoreTimings> {
self.restore_snapshot_timed_with_options(snap, snapshot::SnapshotRestoreOptions::default())
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn restore_snapshot_timed_with_options(
&self,
snap: &snapshot::Snapshot,
options: snapshot::SnapshotRestoreOptions,
) -> crate::hvf::Result<VmmRestoreTimings> {
let core = snapshot::restore_snapshot_timed_with_options(&self.vm, snap, options)?;
let t0 = std::time::Instant::now();
for (i, m) in snap.virtio.mmio.iter().enumerate() {
if let Some(d) = self.all_mmio.get(i) {
d.restore_state(m);
}
}
let mmio_restore_us = t0.elapsed().as_micros();
let t0 = std::time::Instant::now();
self.vsock
.muxer()
.restore_tsi_listeners(&snap.virtio.vsock_listeners);
let listener_restore_us = t0.elapsed().as_micros();
Ok(VmmRestoreTimings {
ram_copy_us: core.ram_copy_us,
gic_restore_us: core.gic_restore_us,
vcpu_restore_us: core.vcpu_restore_us,
vtimer_offset_us: core.vtimer_offset_us,
mmio_restore_us,
listener_restore_us,
})
}
pub fn reset_vsock_transport(&self) {
self.vsock.muxer().reset();
self.vsock.reset_pending_rx();
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn build_vmm(
resources: &VmResources,
cow_ram: Option<(*mut u8, usize)>,
restore_memory_len: Option<usize>,
) -> Result<Vmm, BuildError> {
let built = build_vm(resources, cow_ram, restore_memory_len)?;
let device_set = build_device_set(
&built.vm,
&resources.block_devices,
&resources.volumes,
&built.virtio_plan,
)?;
let coord = VcpuCoordinator::new(resources.vcpus);
coord.slots[0]
.on
.store(true, std::sync::atomic::Ordering::SeqCst);
Ok(Vmm {
vm: built.vm,
bus: Arc::new(device_set.bus),
all_mmio: device_set.all_mmio,
vsock: device_set.vsock,
coord,
})
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn build_vm(
resources: &VmResources,
cow_ram: Option<(*mut u8, usize)>,
restore_memory_len: Option<usize>,
) -> Result<BuiltVm, BuildError> {
let mem_size = resources.memory_bytes();
let block_paths = &resources.block_devices;
let kernel = if resources.is_restore() {
Vec::new()
} else {
let kernel_path = resources
.kernel_path
.as_deref()
.ok_or(BuildError::MissingKernel)?;
let k = loader::read_image(kernel_path).map_err(|source| BuildError::KernelImage {
path: kernel_path.to_string(),
source,
})?;
eprintln!(" kernel {} bytes loaded, magic OK", k.len());
k
};
let initrd = if resources.is_restore() {
None
} else {
match resources.initrd_path.as_deref() {
Some(path) => {
let initrd =
loader::read_initramfs(path).map_err(|source| BuildError::Initramfs {
path: path.to_string(),
source,
})?;
eprintln!(" initramfs {} bytes loaded", initrd.len());
Some(initrd)
}
None => None,
}
};
let virtio_plan = virtio_mmio_plan(block_paths.len() + resources.volumes.len());
let actual_mem = cow_ram
.map(|(_, len)| len)
.or(restore_memory_len)
.unwrap_or(mem_size);
let vm = if let Some((ptr, len)) = cow_ram {
MicroVm::new_with_ram(ptr, len, true)?
} else {
MicroVm::new(actual_mem)?
};
if !resources.is_restore() {
let fdt = fdt::generate(
resources.vcpus as usize,
mem_size as u64,
&resources.cmdline,
initrd.as_ref().map(|i| {
let initrd_gpa = crate::vmm::vstate::initrd_gpa(
layout::DRAM_MEM_START_KERNEL,
mem_size as u64,
kernel.len() as u64,
i.len() as u64,
);
(initrd_gpa, i.len() as u64)
}),
&virtio_plan.entries,
)
.map_err(BuildError::Fdt)?;
eprintln!(" FDT {} bytes generated", fdt.len());
boot_linux(&vm, &kernel, initrd.as_deref(), &fdt)?;
}
Ok(BuiltVm { vm, virtio_plan })
}
pub fn virtio_mmio_plan(block_device_count: usize) -> VirtioMmioPlan {
let mut entries = vec![VirtioMmioEntry {
base: layout::VIRTIO_MMIO_BASE,
irq: layout::IRQ_BASE,
}];
let rng_idx = (1 + block_device_count) as u64;
let rng_base = layout::VIRTIO_MMIO_BASE + rng_idx * layout::VIRTIO_MMIO_STRIDE;
let rng_irq = layout::IRQ_BASE + rng_idx as u32 + 1;
let balloon_idx = rng_idx + 1;
let balloon_base = layout::VIRTIO_MMIO_BASE + balloon_idx * layout::VIRTIO_MMIO_STRIDE;
let balloon_irq = layout::IRQ_BASE + balloon_idx as u32 + 1;
for i in 0..block_device_count {
let n = (i as u64) + 1;
entries.push(VirtioMmioEntry {
base: layout::VIRTIO_MMIO_BASE + n * layout::VIRTIO_MMIO_STRIDE,
irq: layout::IRQ_BASE + n as u32 + 1,
});
}
entries.push(VirtioMmioEntry {
base: rng_base,
irq: rng_irq,
});
entries.push(VirtioMmioEntry {
base: balloon_base,
irq: balloon_irq,
});
VirtioMmioPlan {
entries,
rng_base,
rng_irq,
balloon_base,
balloon_irq,
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub fn build_device_set(
vm: &MicroVm,
block_paths: &[String],
volumes: &[crate::vmm::resources::VolumeSpec],
plan: &VirtioMmioPlan,
) -> Result<DeviceSet, BuildError> {
let bus = MmioBus::new();
bus.register(layout::SERIAL_MMIO_BASE, Arc::new(SerialPl011::new()));
let mut all_mmio: Vec<Arc<MmioVirtio>> = Vec::new();
let mem = GuestMem::new(vm.ram_host, vm.ram_gpa, vm.ram_size);
let vsock = Arc::new(VirtioVsock::new(3 )?);
let raw_spi: Arc<dyn Fn() + Send + Sync> = Arc::new(|| {
let _ = crate::hvf::gic_set_spi(layout::IRQ_BASE, true);
});
let vsock_mmio = Arc::new(MmioVirtio::new(vsock.clone(), mem.clone(), raw_spi));
let device_irq = vsock_mmio.make_used_buffer_irq();
vsock.set_irq_raise(device_irq);
let vsock_for_kick = vsock.clone();
let kick: Arc<dyn Fn() + Send + Sync> = Arc::new(move || {
vsock_for_kick.kick();
});
vsock.muxer().set_kick(kick);
bus.register(layout::VIRTIO_MMIO_BASE, vsock_mmio.clone());
all_mmio.push(vsock_mmio);
eprintln!(" vsock@{:x} CID=3", layout::VIRTIO_MMIO_BASE);
let rng = Arc::new(VirtioRng::new());
let rng_irq = plan.rng_irq;
let rng_raw_spi: Arc<dyn Fn() + Send + Sync> = Arc::new(move || {
let _ = crate::hvf::gic_set_spi(rng_irq, true);
});
let rng_mmio = Arc::new(MmioVirtio::new(rng.clone(), mem.clone(), rng_raw_spi));
rng.set_irq_raise(rng_mmio.make_used_buffer_irq());
bus.register(plan.rng_base, rng_mmio.clone());
all_mmio.push(rng_mmio);
eprintln!(" rng@{:x}", plan.rng_base);
let balloon = Arc::new(VirtioBalloon::new());
let balloon_dev = Arc::new(VirtioBalloonWithRam {
inner: balloon.clone(),
ram_host: vm.ram_host,
ram_size: vm.ram_size,
ram_gpa: vm.ram_gpa,
});
let balloon_irq = plan.balloon_irq;
let balloon_raw_spi: Arc<dyn Fn() + Send + Sync> = Arc::new(move || {
let _ = crate::hvf::gic_set_spi(balloon_irq, true);
});
let balloon_mmio = Arc::new(MmioVirtio::new(balloon_dev, mem.clone(), balloon_raw_spi));
balloon.set_irq_raise(balloon_mmio.make_used_buffer_irq());
balloon.set_config_irq_raise(balloon_mmio.make_config_change_irq());
bus.register(plan.balloon_base, balloon_mmio.clone());
all_mmio.push(balloon_mmio);
eprintln!(" balloon@{:x}", plan.balloon_base);
for (i, path) in block_paths.iter().enumerate() {
let n = (i as u64) + 1;
let blk = Arc::new(
VirtioBlk::open_ro(&format!("blk{i}"), path).map_err(|source| {
BuildError::BlockDevice {
path: path.clone(),
source,
}
})?,
);
let blk_irq_intid = layout::IRQ_BASE + n as u32 + 1;
let blk_raw_spi: Arc<dyn Fn() + Send + Sync> = Arc::new(move || {
let _ = crate::hvf::gic_set_spi(blk_irq_intid, true);
});
let blk_mmio = Arc::new(MmioVirtio::new(blk.clone(), mem.clone(), blk_raw_spi));
let blk_dev_irq = blk_mmio.make_used_buffer_irq();
blk.set_irq_raise(blk_dev_irq);
let blk_base = layout::VIRTIO_MMIO_BASE + n * layout::VIRTIO_MMIO_STRIDE;
bus.register(blk_base, blk_mmio.clone());
all_mmio.push(blk_mmio);
eprintln!(" blk{i}@{blk_base:x}");
}
let ro_count = block_paths.len();
for (j, vol) in volumes.iter().enumerate() {
let i = ro_count + j;
let n = (i as u64) + 1;
let name = format!("vol{j}");
let blk = Arc::new(
VirtioBlk::open_rw(&name, &vol.host_path, vol.size_bytes).map_err(|source| {
BuildError::BlockDevice {
path: vol.host_path.clone(),
source,
}
})?,
);
let blk_irq_intid = layout::IRQ_BASE + n as u32 + 1;
let blk_raw_spi: Arc<dyn Fn() + Send + Sync> = Arc::new(move || {
let _ = crate::hvf::gic_set_spi(blk_irq_intid, true);
});
let blk_mmio = Arc::new(MmioVirtio::new(blk.clone(), mem.clone(), blk_raw_spi));
let blk_dev_irq = blk_mmio.make_used_buffer_irq();
blk.set_irq_raise(blk_dev_irq);
let blk_base = layout::VIRTIO_MMIO_BASE + n * layout::VIRTIO_MMIO_STRIDE;
bus.register(blk_base, blk_mmio.clone());
all_mmio.push(blk_mmio);
eprintln!(
" {name}@{blk_base:x} (rw, mount {})",
vol.guest_path
);
}
Ok(DeviceSet {
bus,
all_mmio,
vsock,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn virtio_plan_preserves_expected_order() {
let plan = virtio_mmio_plan(2);
assert_eq!(plan.entries.len(), 5);
assert_eq!(plan.entries[0].base, layout::VIRTIO_MMIO_BASE);
assert_eq!(
plan.entries[1].base,
layout::VIRTIO_MMIO_BASE + layout::VIRTIO_MMIO_STRIDE
);
assert_eq!(
plan.entries[2].base,
layout::VIRTIO_MMIO_BASE + 2 * layout::VIRTIO_MMIO_STRIDE
);
assert_eq!(plan.entries[3].base, plan.rng_base);
assert_eq!(plan.entries[4].base, plan.balloon_base);
}
}