supermachine 0.4.13

Run any OCI/Docker image as a hardware-isolated microVM on macOS HVF (Linux KVM and Windows WHP in progress). Single library API, zero flags for the common case, sub-100 ms cold-restore from snapshot.
// Thin Result-returning wrappers over applevisor-sys: VM create,
// vCPU create/run, mem map, exit decode. The HvfVm / HvfVcpu API
// surface this exposes is what vmm::vstate sits on top of.

#![cfg(all(target_os = "macos", target_arch = "aarch64"))]

use applevisor_sys as av;

#[derive(Debug)]
pub enum Error {
    /// `hv_*` returned a non-zero status. We surface the raw value
    /// so callers can decode it via Apple's `HV_ERROR_*` constants.
    Hv(i32),
}

pub type Result<T> = std::result::Result<T, Error>;

#[inline]
fn check(rc: av::hv_return_t) -> Result<()> {
    if rc == 0 {
        Ok(())
    } else {
        Err(Error::Hv(rc as i32))
    }
}

/// Owned VM handle. `hv_vm_create` is a process-wide singleton on
/// HVF, so creating a second `Vm` while one exists will fail. We
/// don't enforce singleton in code; caller responsibility.
pub struct Vm;

impl Vm {
    /// Create the per-process HVF VM and the in-kernel GIC v3.
    /// Distributor + redistributor base addresses come from
    /// `arch::aarch64::layout` and MUST match the FDT we generate.
    pub fn new() -> Result<Self> {
        use crate::arch::aarch64::layout;
        // SAFETY: HVF API; no aliasing.
        unsafe {
            check(av::hv_vm_create(std::ptr::null_mut()))?;
            let gic_cfg = av::hv_gic_config_create();
            check(av::hv_gic_config_set_distributor_base(
                gic_cfg,
                layout::GICV3_DIST_BASE,
            ))?;
            check(av::hv_gic_config_set_redistributor_base(
                gic_cfg,
                layout::GICV3_REDIST_BASE,
            ))?;
            check(av::hv_gic_create(gic_cfg))?;
        }
        Ok(Vm)
    }

    /// Map a host buffer as guest physical memory. `gpa` is the
    /// guest physical address; `host_ptr` points to host memory of
    /// `len` bytes; `flags` is read/write/exec mask.
    ///
    /// SAFETY: caller must keep `host_ptr` valid + readable/writable
    /// per `flags` for the lifetime of the VM (or until `unmap`).
    pub unsafe fn map(&self, host_ptr: *mut u8, gpa: u64, len: usize, flags: u64) -> Result<()> {
        check(unsafe { av::hv_vm_map(host_ptr as _, gpa, len, flags as _) })
    }
}

impl Drop for Vm {
    fn drop(&mut self) {
        // SAFETY: HVF teardown.
        unsafe {
            let _ = av::hv_vm_destroy();
        }
    }
}

/// vCPU handle — bound to the thread that creates it.
pub struct Vcpu {
    handle: av::hv_vcpu_t,
    exit: *const av::hv_vcpu_exit_t,
}

impl Vcpu {
    pub fn new() -> Result<Self> {
        let mut handle: av::hv_vcpu_t = 0;
        let mut exit: *const av::hv_vcpu_exit_t = std::ptr::null();
        // SAFETY: HVF API.
        unsafe {
            check(av::hv_vcpu_create(
                &mut handle,
                &mut exit,
                std::ptr::null_mut(),
            ))?;
        }
        Ok(Self { handle, exit })
    }

    pub fn handle(&self) -> av::hv_vcpu_t {
        self.handle
    }

    pub fn set_reg(&self, reg: av::hv_reg_t, value: u64) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_vcpu_set_reg(self.handle, reg, value)) }
    }

    pub fn get_reg(&self, reg: av::hv_reg_t) -> Result<u64> {
        let mut v: u64 = 0;
        // SAFETY: HVF API.
        unsafe {
            check(av::hv_vcpu_get_reg(self.handle, reg, &mut v))?;
        }
        Ok(v)
    }

    pub fn get_sys_reg(&self, reg: av::hv_sys_reg_t) -> Result<u64> {
        let mut v: u64 = 0;
        // SAFETY: HVF API.
        unsafe {
            check(av::hv_vcpu_get_sys_reg(self.handle, reg, &mut v))?;
        }
        Ok(v)
    }

    /// Convenience: index registers X0..X30 by their numeric index.
    pub fn get_x(&self, n: u32) -> Result<u64> {
        let r = match n {
            0..=30 => unsafe {
                std::mem::transmute::<u32, av::hv_reg_t>(av::hv_reg_t::X0 as u32 + n)
            },
            _ => return Ok(0),
        };
        self.get_reg(r)
    }
    pub fn set_x(&self, n: u32, v: u64) -> Result<()> {
        let r = match n {
            0..=30 => unsafe {
                std::mem::transmute::<u32, av::hv_reg_t>(av::hv_reg_t::X0 as u32 + n)
            },
            _ => return Ok(()),
        };
        self.set_reg(r, v)
    }

    pub fn set_sys_reg(&self, reg: av::hv_sys_reg_t, value: u64) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_vcpu_set_sys_reg(self.handle, reg, value)) }
    }

    pub fn get_simd_fp_reg(&self, reg: av::hv_simd_fp_reg_t) -> Result<u128> {
        let mut v: u128 = 0;
        // SAFETY: HVF API; out-pointer local.
        unsafe {
            check(av::hv_vcpu_get_simd_fp_reg(self.handle, reg, &mut v))?;
        }
        Ok(v)
    }

    pub fn set_simd_fp_reg(&self, reg: av::hv_simd_fp_reg_t, value: u128) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_vcpu_set_simd_fp_reg(self.handle, reg, value)) }
    }

    pub fn get_vtimer_offset(&self) -> Result<u64> {
        let mut v: u64 = 0;
        // SAFETY: HVF API; out-pointer local.
        unsafe {
            check(av::hv_vcpu_get_vtimer_offset(self.handle, &mut v))?;
        }
        Ok(v)
    }

    pub fn set_vtimer_offset(&self, value: u64) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_vcpu_set_vtimer_offset(self.handle, value)) }
    }

    pub fn set_vtimer_mask(&self, masked: bool) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_vcpu_set_vtimer_mask(self.handle, masked)) }
    }

    pub fn get_icc_reg(&self, reg: av::hv_gic_icc_reg_t) -> Result<u64> {
        let mut v: u64 = 0;
        // SAFETY: HVF API.
        unsafe {
            check(av::hv_gic_get_icc_reg(self.handle, reg, &mut v))?;
        }
        Ok(v)
    }

    pub fn set_icc_reg(&self, reg: av::hv_gic_icc_reg_t, value: u64) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_gic_set_icc_reg(self.handle, reg, value)) }
    }

    pub fn get_redist_reg(&self, reg: av::hv_gic_redistributor_reg_t) -> Result<u64> {
        let mut v: u64 = 0;
        // SAFETY: HVF API.
        unsafe {
            check(av::hv_gic_get_redistributor_reg(self.handle, reg, &mut v))?;
        }
        Ok(v)
    }

    pub fn set_redist_reg(&self, reg: av::hv_gic_redistributor_reg_t, value: u64) -> Result<()> {
        // SAFETY: HVF API.
        unsafe { check(av::hv_gic_set_redistributor_reg(self.handle, reg, value)) }
    }

    /// Drive the vCPU until it exits. Returns the raw exit struct;
    /// callers decode `reason` and (for MMIO) `exception.syndrome`.
    pub fn run(&self) -> Result<&av::hv_vcpu_exit_t> {
        // SAFETY: HVF API; we hold an exclusive borrow of self for
        // the duration of run.
        unsafe {
            check(av::hv_vcpu_run(self.handle))?;
            Ok(&*self.exit)
        }
    }
}

impl Drop for Vcpu {
    fn drop(&mut self) {
        // SAFETY: HVF teardown.
        unsafe {
            let _ = av::hv_vcpu_destroy(self.handle);
        }
    }
}

/// Raise (or deassert) a GIC SPI line. `intid` is the absolute
/// SPI INTID (32+). Used by virtio devices to interrupt the guest.
pub fn gic_set_spi(intid: u32, level: bool) -> Result<()> {
    // SAFETY: HVF API. intid bounds-checked by HVF.
    unsafe { check(av::hv_gic_set_spi(intid, level)) }
}

/// Capture the in-kernel GIC's full state as an opaque blob. This is
/// the only API that round-trips per-PE pending/active machinery and
/// the distributor state; per-register get/set is incomplete (Apple
/// hides interrupt routing internals). Per Apple's hv_gic_state.h.
///
/// Caller must have a live `Vm`. Costs ~1.4 ms.
pub fn gic_state_capture() -> Result<Vec<u8>> {
    // SAFETY: HVF API.
    let state = unsafe { av::hv_gic_state_create() };
    if state.is_null() {
        return Err(Error::Hv(-1));
    }
    let mut sz: usize = 0;
    // SAFETY: out-pointer local.
    let rc = unsafe { av::hv_gic_state_get_size(state, &mut sz) };
    if rc != 0 {
        // SAFETY: state is a valid object_t.
        unsafe {
            av::os_release(state as *mut _);
        }
        return Err(Error::Hv(rc as i32));
    }
    let mut buf = vec![0u8; sz];
    // SAFETY: buf has sz writable bytes.
    let rc = unsafe { av::hv_gic_state_get_data(state, buf.as_mut_ptr() as *mut _) };
    // SAFETY: state is a valid object_t.
    unsafe {
        av::os_release(state as *mut _);
    }
    if rc != 0 {
        return Err(Error::Hv(rc as i32));
    }
    Ok(buf)
}

/// Install a GIC state blob (from `gic_state_capture` or a snapshot).
/// Must be called after `hv_gic_create` + `hv_vcpu_create` but BEFORE
/// any `hv_vcpu_run`.
pub fn gic_state_restore(blob: &[u8]) -> Result<()> {
    // SAFETY: HVF API; blob lives for the call.
    unsafe { check(av::hv_gic_set_state(blob.as_ptr() as *const _, blob.len())) }
}

/// Memory-protection flags accepted by `hv_vm_map`.
pub mod prot {
    pub const READ: u64 = 1;
    pub const WRITE: u64 = 1 << 1;
    pub const EXEC: u64 = 1 << 2;
    pub const RWX: u64 = READ | WRITE | EXEC;
    pub const RW: u64 = READ | WRITE;
    pub const RX: u64 = READ | EXEC;
}

/// Decode the `reason` field of `hv_vcpu_exit_t`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExitReason {
    Canceled,
    Exception,
    VTimerActivated,
    Unknown(u32),
}

impl From<u32> for ExitReason {
    fn from(v: u32) -> Self {
        // From hv_vcpu_exit_reason enum (header):
        //   HV_EXIT_REASON_CANCELED        = 0
        //   HV_EXIT_REASON_EXCEPTION       = 1
        //   HV_EXIT_REASON_VTIMER_ACTIVATED = 2
        //   HV_EXIT_REASON_UNKNOWN         = 3
        match v {
            0 => Self::Canceled,
            1 => Self::Exception,
            2 => Self::VTimerActivated,
            _ => Self::Unknown(v),
        }
    }
}