use crate::vmm::PiMutex;
use crate::vmm::vcpu::{SCX_EXIT_ERROR_THRESHOLD, WatchpointArm, self_arm_watchpoint};
use crate::vmm::{console, kvm, virtio_blk, virtio_console, virtio_net};
use kvm_ioctls::VcpuExit;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use vmm_sys_util::eventfd::EventFd;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub struct VcpuRegSnapshot {
pub instruction_pointer: u64,
pub stack_pointer: u64,
pub page_table_root: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub user_page_table_root: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tcr_el1: Option<u64>,
}
#[cfg(target_arch = "x86_64")]
pub(crate) fn capture_vcpu_regs(vcpu: &mut kvm_ioctls::VcpuFd) -> Option<VcpuRegSnapshot> {
let regs = vcpu.get_regs().ok()?;
let sregs = vcpu.get_sregs().ok()?;
Some(VcpuRegSnapshot {
instruction_pointer: regs.rip,
stack_pointer: regs.rsp,
page_table_root: sregs.cr3,
user_page_table_root: None,
tcr_el1: None,
})
}
#[cfg(target_arch = "aarch64")]
pub(crate) fn capture_vcpu_regs(vcpu: &mut kvm_ioctls::VcpuFd) -> Option<VcpuRegSnapshot> {
const KVM_REG_ARM64: u64 = 0x6000_0000_0000_0000;
const KVM_REG_SIZE_U64: u64 = 0x0030_0000_0000_0000;
const KVM_REG_ARM_CORE: u64 = 0x0010_0000;
const SP_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | (272 / 4);
const PC_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | (256 / 4);
const KVM_REG_ARM64_SYSREG: u64 = 0x0013_0000;
const TTBR0_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC100;
const TTBR1_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC101;
const TCR_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC102;
let mut buf = [0u8; 8];
let pc = vcpu
.get_one_reg(PC_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf))?;
let sp = vcpu
.get_one_reg(SP_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf))?;
let ttbr1 = vcpu
.get_one_reg(TTBR1_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf))
.unwrap_or(0);
let ttbr0 = vcpu
.get_one_reg(TTBR0_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf));
let tcr_el1 = vcpu
.get_one_reg(TCR_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf));
Some(VcpuRegSnapshot {
instruction_pointer: pc,
stack_pointer: sp,
page_table_root: ttbr1,
user_page_table_root: ttbr0,
tcr_el1,
})
}
#[cfg(target_arch = "x86_64")]
pub(crate) fn read_tcr_el1(_vcpu: &mut kvm_ioctls::VcpuFd) -> Option<u64> {
None
}
#[cfg(target_arch = "aarch64")]
pub(crate) fn read_tcr_el1(vcpu: &mut kvm_ioctls::VcpuFd) -> Option<u64> {
const KVM_REG_ARM64: u64 = 0x6000_0000_0000_0000;
const KVM_REG_SIZE_U64: u64 = 0x0030_0000_0000_0000;
const KVM_REG_ARM64_SYSREG: u64 = 0x0013_0000;
const TCR_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC102;
let mut buf = [0u8; 8];
vcpu.get_one_reg(TCR_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf))
}
#[cfg(target_arch = "x86_64")]
pub(crate) fn read_cr3(vcpu: &mut kvm_ioctls::VcpuFd) -> Option<u64> {
vcpu.get_sregs().ok().map(|s| s.cr3)
}
#[cfg(target_arch = "aarch64")]
pub(crate) fn read_cr3(vcpu: &mut kvm_ioctls::VcpuFd) -> Option<u64> {
const KVM_REG_ARM64: u64 = 0x6000_0000_0000_0000;
const KVM_REG_SIZE_U64: u64 = 0x0030_0000_0000_0000;
const KVM_REG_ARM64_SYSREG: u64 = 0x0013_0000;
const TTBR1_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC101;
let mut buf = [0u8; 8];
vcpu.get_one_reg(TTBR1_EL1_ID, &mut buf)
.ok()
.map(|_| u64::from_le_bytes(buf))
}
impl std::fmt::Display for VcpuRegSnapshot {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"ip=0x{:016x} sp=0x{:016x} ptroot=0x{:016x}",
self.instruction_pointer, self.stack_pointer, self.page_table_root
)?;
if let Some(uptr) = self.user_page_table_root {
write!(f, " uptroot=0x{uptr:016x}")?;
}
Ok(())
}
}
#[cfg(target_arch = "aarch64")]
#[allow(clippy::too_many_arguments)]
pub(crate) fn dispatch_mmio_write(
com1: &PiMutex<console::Serial>,
com2: &PiMutex<console::Serial>,
virtio_con: Option<&PiMutex<virtio_console::VirtioConsole>>,
virtio_blk: Option<&PiMutex<virtio_blk::VirtioBlk>>,
virtio_net: Option<&PiMutex<virtio_net::VirtioNet>>,
addr: u64,
data: &[u8],
) {
if let Some(offset) = mmio_serial_offset(addr, kvm::SERIAL_MMIO_BASE) {
if let Some(&byte) = data.first() {
com1.lock().inner_write(offset, byte);
}
} else if let Some(offset) = mmio_serial_offset(addr, kvm::SERIAL2_MMIO_BASE)
&& let Some(&byte) = data.first()
{
com2.lock().inner_write(offset, byte);
} else if let Some(vc) = virtio_con
&& (kvm::VIRTIO_CONSOLE_MMIO_BASE
..kvm::VIRTIO_CONSOLE_MMIO_BASE + virtio_console::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vc.lock()
.mmio_write(addr - kvm::VIRTIO_CONSOLE_MMIO_BASE, data);
} else if let Some(vb) = virtio_blk
&& (kvm::VIRTIO_BLK_MMIO_BASE..kvm::VIRTIO_BLK_MMIO_BASE + virtio_blk::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vb.lock().mmio_write(addr - kvm::VIRTIO_BLK_MMIO_BASE, data);
} else if let Some(vn) = virtio_net
&& (kvm::VIRTIO_NET_MMIO_BASE..kvm::VIRTIO_NET_MMIO_BASE + virtio_net::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vn.lock().mmio_write(addr - kvm::VIRTIO_NET_MMIO_BASE, data);
}
}
#[cfg(target_arch = "aarch64")]
#[allow(clippy::too_many_arguments)]
pub(crate) fn dispatch_mmio_read(
com1: &PiMutex<console::Serial>,
com2: &PiMutex<console::Serial>,
virtio_con: Option<&PiMutex<virtio_console::VirtioConsole>>,
virtio_blk: Option<&PiMutex<virtio_blk::VirtioBlk>>,
virtio_net: Option<&PiMutex<virtio_net::VirtioNet>>,
addr: u64,
data: &mut [u8],
) {
if let Some(offset) = mmio_serial_offset(addr, kvm::SERIAL_MMIO_BASE) {
if let Some(first) = data.first_mut() {
*first = com1.lock().inner_read(offset);
}
} else if let Some(offset) = mmio_serial_offset(addr, kvm::SERIAL2_MMIO_BASE) {
if let Some(first) = data.first_mut() {
*first = com2.lock().inner_read(offset);
}
} else if let Some(vc) = virtio_con
&& (kvm::VIRTIO_CONSOLE_MMIO_BASE
..kvm::VIRTIO_CONSOLE_MMIO_BASE + virtio_console::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vc.lock()
.mmio_read(addr - kvm::VIRTIO_CONSOLE_MMIO_BASE, data);
} else if let Some(vb) = virtio_blk
&& (kvm::VIRTIO_BLK_MMIO_BASE..kvm::VIRTIO_BLK_MMIO_BASE + virtio_blk::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vb.lock().mmio_read(addr - kvm::VIRTIO_BLK_MMIO_BASE, data);
} else if let Some(vn) = virtio_net
&& (kvm::VIRTIO_NET_MMIO_BASE..kvm::VIRTIO_NET_MMIO_BASE + virtio_net::VIRTIO_MMIO_SIZE)
.contains(&addr)
{
vn.lock().mmio_read(addr - kvm::VIRTIO_NET_MMIO_BASE, data);
} else {
for b in data.iter_mut() {
*b = 0xff;
}
}
}
#[cfg(target_arch = "aarch64")]
fn mmio_serial_offset(addr: u64, base: u64) -> Option<u8> {
const MAX_REG_OFFSET: u64 = u8::MAX as u64 + 1;
const _: () = assert!(
kvm::SERIAL_MMIO_SIZE >= MAX_REG_OFFSET,
"SERIAL_MMIO_SIZE must cover at least the 256-byte u8-representable \
register window mmio_serial_offset accepts"
);
if addr >= base && addr < base + MAX_REG_OFFSET {
Some((addr - base) as u8)
} else {
None
}
}
#[cfg(target_arch = "aarch64")]
const ESR_ELX_EC_WATCHPT_LOW: u32 = 0x34;
#[cfg(target_arch = "aarch64")]
const ESR_ELX_EC_SOFTSTP_LOW: u32 = 0x32;
#[cfg(target_arch = "aarch64")]
const ESR_ELX_EC_SHIFT: u32 = 26;
#[cfg(target_arch = "aarch64")]
const ESR_ELX_EC_MASK: u32 = 0x3F;
pub(crate) fn dispatch_watchpoint_hit(
watchpoint: &WatchpointArm,
debug_arch: &kvm_bindings::kvm_debug_exit_arch,
armed_slots: &[u64; 4],
single_step_pending: &mut bool,
single_step_slot: &mut usize,
) {
#[cfg(target_arch = "x86_64")]
{
let _ = armed_slots;
let _ = (&mut *single_step_pending, &mut *single_step_slot);
let dr6 = debug_arch.dr6;
let trap_bits = (dr6 & 0xF) as u8;
if trap_bits == 0 {
tracing::debug!(
dr6,
"KVM_EXIT_DEBUG fired with no DR0..DR3 trap bit set \
(BS/BT or spurious); not latching"
);
return;
}
if trap_bits & 0x1 != 0 {
latch_slot0_with_gate(watchpoint);
}
for idx in 0..3 {
if trap_bits & (1u8 << (idx + 1)) != 0 {
watchpoint.latch_user_hit(idx);
}
}
}
#[cfg(target_arch = "aarch64")]
{
let ec = (debug_arch.hsr >> ESR_ELX_EC_SHIFT) & ESR_ELX_EC_MASK;
if ec == ESR_ELX_EC_SOFTSTP_LOW {
if *single_step_pending {
*single_step_pending = false;
*single_step_slot = 0;
} else {
tracing::debug!(
hsr = debug_arch.hsr,
"KVM_EXIT_DEBUG soft-step EC with no \
single-step pending; ignoring (likely \
spurious kernel-side step exit)"
);
}
return;
}
if ec != ESR_ELX_EC_WATCHPT_LOW {
tracing::debug!(
hsr = debug_arch.hsr,
ec,
"KVM_EXIT_DEBUG with non-watchpoint EC; ignoring \
(breakpoint/BRK paths are not used by ktstr)"
);
return;
}
let far = debug_arch.far;
let mut matched_mask: u8 = 0;
for (i, kva) in armed_slots.iter().enumerate() {
if *kva == 0 {
continue;
}
if far >= *kva && far < kva.saturating_add(4) {
matched_mask |= 1 << i;
if i == 0 {
latch_slot0_with_gate(watchpoint);
} else {
watchpoint.latch_user_hit(i - 1);
}
}
}
if matched_mask == 0 {
tracing::debug!(
hsr = debug_arch.hsr,
far,
armed = ?armed_slots,
"KVM_EXIT_DEBUG watchpoint fired but FAR matched no \
armed slot (possible KVM watchpoint match-distance \
fallback or stale arm); not latching"
);
return;
}
*single_step_pending = true;
*single_step_slot = matched_mask as usize;
}
}
fn latch_slot0_with_gate(watchpoint: &WatchpointArm) {
let host_ptr = watchpoint.kind_host_ptr.load(Ordering::Acquire);
if host_ptr.is_null() {
tracing::error!(
"latch_slot0_with_gate: kind_host_ptr null at fire time — \
publication invariant broken (request_kva non-zero must \
imply kind_host_ptr non-null per the Release-store \
ordering in freeze_coord.rs::run_coord_loop). Skipping \
slot-0 latch; the BPF .bss late-trigger fallback in the \
freeze coordinator's poll loop remains active."
);
return;
}
std::sync::atomic::fence(Ordering::Acquire);
let kind = unsafe { std::ptr::read_volatile(host_ptr) };
if kind >= SCX_EXIT_ERROR_THRESHOLD {
watchpoint.latch_hit();
} else {
tracing::debug!(
kind,
threshold = SCX_EXIT_ERROR_THRESHOLD,
"watchpoint fired on non-error exit_kind transition \
(e.g. SCX_EXIT_DONE on clean shutdown); skipping \
freeze trigger"
);
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn vcpu_run_loop_unified(
vcpu: &mut kvm_ioctls::VcpuFd,
com1: &Arc<PiMutex<console::Serial>>,
com2: &Arc<PiMutex<console::Serial>>,
virtio_con: Option<&Arc<PiMutex<virtio_console::VirtioConsole>>>,
virtio_blk: Option<&Arc<PiMutex<virtio_blk::VirtioBlk>>>,
virtio_net: Option<&Arc<PiMutex<virtio_net::VirtioNet>>>,
kill: &Arc<AtomicBool>,
kill_evt: &Arc<EventFd>,
freeze: &Arc<AtomicBool>,
parked: &Arc<AtomicBool>,
regs_slot: &Arc<std::sync::Mutex<Option<VcpuRegSnapshot>>>,
watchpoint: &Arc<WatchpointArm>,
has_immediate_exit: bool,
parked_evt: Option<&Arc<EventFd>>,
thaw_evt: Option<&Arc<EventFd>>,
) {
let mut armed_slots: [u64; 4] = [0; 4];
let mut arm_failures: u8 = 0;
let mut single_step_pending: bool = false;
let mut single_step_slot: usize = 0;
let mut armed_single_step: bool = false;
loop {
if kill.load(Ordering::Acquire) {
break;
}
if freeze.load(Ordering::Acquire) {
handle_freeze(
vcpu,
has_immediate_exit,
kill,
freeze,
parked,
regs_slot,
parked_evt.map(|a| a.as_ref()),
thaw_evt.map(|a| a.as_ref()),
Some(kill_evt.as_ref()),
);
if kill.load(Ordering::Acquire) {
break;
}
}
self_arm_watchpoint(
vcpu,
watchpoint,
&mut armed_slots,
&mut arm_failures,
single_step_pending,
single_step_slot,
&mut armed_single_step,
);
match vcpu.run() {
Ok(mut exit) => {
if matches!(exit, VcpuExit::Hlt) {
if kill.load(Ordering::Acquire) {
break;
}
continue;
}
if let VcpuExit::Debug(debug_arch) = &exit {
dispatch_watchpoint_hit(
watchpoint,
debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
if kill.load(Ordering::Acquire) {
break;
}
continue;
}
match classify_exit(
com1,
com2,
virtio_con.map(|a| a.as_ref()),
virtio_blk.map(|a| a.as_ref()),
virtio_net.map(|a| a.as_ref()),
&mut exit,
) {
Some(ExitAction::Continue) | None => {}
Some(ExitAction::Shutdown) => {
kill.store(true, Ordering::Release);
let _ = kill_evt.write(1);
break;
}
Some(ExitAction::Fatal(_)) => {
tracing::error!("AP fatal exit");
kill.store(true, Ordering::Release);
let _ = kill_evt.write(1);
break;
}
}
}
Err(e) => {
if e.errno() == libc::EINTR || e.errno() == libc::EAGAIN {
vcpu.set_kvm_immediate_exit(0);
if kill.load(Ordering::Acquire) {
break;
}
continue;
}
if kill.load(Ordering::Acquire) {
break;
}
}
}
if kill.load(Ordering::Acquire) {
break;
}
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn handle_freeze(
vcpu: &mut kvm_ioctls::VcpuFd,
has_immediate_exit: bool,
kill: &Arc<AtomicBool>,
freeze: &Arc<AtomicBool>,
parked: &Arc<AtomicBool>,
regs_slot: &Arc<std::sync::Mutex<Option<VcpuRegSnapshot>>>,
parked_evt: Option<&EventFd>,
thaw_evt: Option<&EventFd>,
kill_evt: Option<&EventFd>,
) {
if has_immediate_exit {
vcpu.set_kvm_immediate_exit(1);
if let Err(e) = vcpu.run()
&& e.errno() != libc::EINTR
{
tracing::warn!(
err = %e,
"handle_freeze: drain KVM_RUN failed with non-EINTR — \
pending PIO/MMIO may not have committed before park"
);
}
vcpu.set_kvm_immediate_exit(0);
}
let snapshot = capture_vcpu_regs(vcpu);
*regs_slot.lock().unwrap_or_else(|e| e.into_inner()) = snapshot;
parked.store(true, Ordering::Release);
if let Some(evt) = parked_evt
&& let Err(e) = evt.write(1)
{
if e.raw_os_error() == Some(libc::EAGAIN) {
tracing::debug!(
err = %e,
"handle_freeze: parked_evt write returned EAGAIN \
(eventfd counter saturated; benign — coordinator \
already has a pending wake edge)"
);
} else {
tracing::warn!(
err = %e,
"handle_freeze: parked_evt write failed with non-EAGAIN \
errno — eventfd may be broken; freeze coordinator wake \
falls back to epoll backstop"
);
}
}
use std::os::fd::AsRawFd;
while freeze.load(Ordering::Acquire) {
if kill.load(Ordering::Acquire) {
break;
}
match (thaw_evt, kill_evt) {
(Some(thaw), kev) => {
let mut pfds = [
libc::pollfd {
fd: thaw.as_raw_fd(),
events: libc::POLLIN,
revents: 0,
},
libc::pollfd {
fd: kev.map_or(-1, |k| k.as_raw_fd()),
events: libc::POLLIN,
revents: 0,
},
];
let nfds = if kev.is_some() { 2 } else { 1 };
unsafe {
libc::poll(pfds.as_mut_ptr(), nfds as libc::nfds_t, 100);
}
}
(None, _) => {
std::thread::park_timeout(std::time::Duration::from_millis(10));
}
}
}
parked.store(false, Ordering::Release);
}
const KVM_SYSTEM_EVENT_SHUTDOWN: u32 = 1;
const KVM_SYSTEM_EVENT_RESET: u32 = 2;
pub(crate) enum ExitAction {
Continue,
Shutdown,
Fatal(Option<u64>),
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn classify_exit(
com1: &PiMutex<console::Serial>,
com2: &PiMutex<console::Serial>,
virtio_con: Option<&PiMutex<virtio_console::VirtioConsole>>,
virtio_blk: Option<&PiMutex<virtio_blk::VirtioBlk>>,
virtio_net: Option<&PiMutex<virtio_net::VirtioNet>>,
exit: &mut VcpuExit,
) -> Option<ExitAction> {
match exit {
#[cfg(target_arch = "x86_64")]
VcpuExit::IoOut(port, data) => {
if dispatch_io_out(com1, com2, *port, data) {
Some(ExitAction::Shutdown)
} else {
Some(ExitAction::Continue)
}
}
#[cfg(target_arch = "x86_64")]
VcpuExit::IoIn(port, data) => {
dispatch_io_in(com1, com2, *port, data);
Some(ExitAction::Continue)
}
#[cfg(target_arch = "aarch64")]
VcpuExit::MmioWrite(addr, data) => {
dispatch_mmio_write(com1, com2, virtio_con, virtio_blk, virtio_net, *addr, data);
Some(ExitAction::Continue)
}
#[cfg(target_arch = "aarch64")]
VcpuExit::MmioRead(addr, data) => {
dispatch_mmio_read(com1, com2, virtio_con, virtio_blk, virtio_net, *addr, data);
Some(ExitAction::Continue)
}
VcpuExit::Hlt => None,
VcpuExit::Shutdown => Some(ExitAction::Shutdown),
VcpuExit::SystemEvent(event_type, _) => {
if *event_type == KVM_SYSTEM_EVENT_SHUTDOWN || *event_type == KVM_SYSTEM_EVENT_RESET {
Some(ExitAction::Shutdown)
} else {
Some(ExitAction::Continue)
}
}
VcpuExit::FailEntry(reason, _cpu) => Some(ExitAction::Fatal(Some(*reason))),
VcpuExit::InternalError => Some(ExitAction::Fatal(None)),
#[cfg(target_arch = "x86_64")]
VcpuExit::MmioRead(addr, data) => {
if let Some(vc) = virtio_con {
let base = kvm::VIRTIO_CONSOLE_MMIO_BASE;
if *addr >= base && *addr < base + virtio_console::VIRTIO_MMIO_SIZE {
vc.lock().mmio_read(*addr - base, data);
return Some(ExitAction::Continue);
}
}
if let Some(vb) = virtio_blk {
let base = kvm::VIRTIO_BLK_MMIO_BASE;
if *addr >= base && *addr < base + virtio_blk::VIRTIO_MMIO_SIZE {
vb.lock().mmio_read(*addr - base, data);
return Some(ExitAction::Continue);
}
}
if let Some(vn) = virtio_net {
let base = kvm::VIRTIO_NET_MMIO_BASE;
if *addr >= base && *addr < base + virtio_net::VIRTIO_MMIO_SIZE {
vn.lock().mmio_read(*addr - base, data);
return Some(ExitAction::Continue);
}
}
for b in data.iter_mut() {
*b = 0xff;
}
Some(ExitAction::Continue)
}
#[cfg(target_arch = "x86_64")]
VcpuExit::MmioWrite(addr, data) => {
if let Some(vc) = virtio_con {
let base = kvm::VIRTIO_CONSOLE_MMIO_BASE;
if *addr >= base && *addr < base + virtio_console::VIRTIO_MMIO_SIZE {
vc.lock().mmio_write(*addr - base, data);
return Some(ExitAction::Continue);
}
}
if let Some(vb) = virtio_blk {
let base = kvm::VIRTIO_BLK_MMIO_BASE;
if *addr >= base && *addr < base + virtio_blk::VIRTIO_MMIO_SIZE {
vb.lock().mmio_write(*addr - base, data);
return Some(ExitAction::Continue);
}
}
if let Some(vn) = virtio_net {
let base = kvm::VIRTIO_NET_MMIO_BASE;
if *addr >= base && *addr < base + virtio_net::VIRTIO_MMIO_SIZE {
vn.lock().mmio_write(*addr - base, data);
return Some(ExitAction::Continue);
}
}
Some(ExitAction::Continue)
}
_ => None,
}
}
#[cfg(target_arch = "x86_64")]
const I8042_DATA_PORT: u16 = 0x60;
#[cfg(target_arch = "x86_64")]
const I8042_CMD_PORT: u16 = 0x64;
#[cfg(target_arch = "x86_64")]
const I8042_CMD_RESET_CPU: u8 = 0xFE;
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out(
com1: &PiMutex<console::Serial>,
com2: &PiMutex<console::Serial>,
port: u16,
data: &[u8],
) -> bool {
if port == I8042_CMD_PORT && data.first() == Some(&I8042_CMD_RESET_CPU) {
return true;
}
if (console::COM1_BASE..console::COM1_BASE + 8).contains(&port) {
com1.lock().handle_out(port, data);
} else if (console::COM2_BASE..console::COM2_BASE + 8).contains(&port) {
com2.lock().handle_out(port, data);
}
false
}
#[cfg(target_arch = "x86_64")]
fn dispatch_io_in(
com1: &PiMutex<console::Serial>,
com2: &PiMutex<console::Serial>,
port: u16,
data: &mut [u8],
) {
match port {
I8042_CMD_PORT => {
if let Some(b) = data.first_mut() {
*b = 0;
}
}
I8042_DATA_PORT => {
if let Some(b) = data.first_mut() {
*b = 0;
}
}
p if (console::COM1_BASE..console::COM1_BASE + 8).contains(&p) => {
com1.lock().handle_in(port, data);
}
p if (console::COM2_BASE..console::COM2_BASE + 8).contains(&p) => {
com2.lock().handle_in(port, data);
}
_ => {}
}
}
#[cfg(all(test, target_arch = "x86_64"))]
mod tests {
use super::*;
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out_i8042_reset_is_shutdown_signal() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
assert!(
dispatch_io_out(&com1, &com2, I8042_CMD_PORT, &[I8042_CMD_RESET_CPU]),
"I8042 reset (0xFE to port 0x64) must signal shutdown"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out_i8042_non_reset() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
assert!(!dispatch_io_out(&com1, &com2, I8042_CMD_PORT, &[0x00]));
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out_serial_com1() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
assert!(!dispatch_io_out(&com1, &com2, console::COM1_BASE, b"A"));
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out_serial_com2() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
assert!(!dispatch_io_out(&com1, &com2, console::COM2_BASE, b"B"));
let output = com2.lock().output();
assert!(output.contains('B'));
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_out_unknown_port() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
assert!(!dispatch_io_out(&com1, &com2, 0x1234, &[0xFF]));
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_in_i8042_status() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut data = [0xFFu8; 1];
dispatch_io_in(&com1, &com2, I8042_CMD_PORT, &mut data);
assert_eq!(data[0], 0);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_in_i8042_data() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut data = [0xFFu8; 1];
dispatch_io_in(&com1, &com2, I8042_DATA_PORT, &mut data);
assert_eq!(data[0], 0);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dispatch_io_in_unknown_port() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut data = [0xFFu8; 1];
dispatch_io_in(&com1, &com2, 0x1234, &mut data);
assert_eq!(data[0], 0xFF, "unknown port should not modify data");
}
#[test]
#[cfg(target_arch = "x86_64")]
fn classify_exit_io_out_i8042_reset_is_shutdown() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data = [I8042_CMD_RESET_CPU];
let mut exit = VcpuExit::IoOut(I8042_CMD_PORT, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Shutdown)),
"IoOut(0x64, [0xFE]) — i8042 reset — must classify as Shutdown"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn classify_exit_io_out_serial_is_continue() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data = [b'Z'];
let mut exit = VcpuExit::IoOut(console::COM1_BASE, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"IoOut to COM1 must classify as Continue (no reboot)"
);
assert!(com1.lock().output().contains('Z'));
}
#[test]
#[cfg(target_arch = "x86_64")]
fn classify_exit_io_in_serial_is_continue() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut data = [0xFFu8; 1];
let mut exit = VcpuExit::IoIn(console::COM1_BASE, &mut data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"IoIn to COM1 must classify as Continue"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn classify_exit_x86_mmio_read_unmapped_returns_0xff() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut buf = [0u8; 4];
let mut exit = VcpuExit::MmioRead(0x1000, &mut buf);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"Unmapped MMIO read must classify as Continue (not Fatal)"
);
assert_eq!(
buf,
[0xff, 0xff, 0xff, 0xff],
"Unmapped MMIO read must fill the data buffer with 0xFF — \
leaving stale bytes would surface as phantom guest reads"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn classify_exit_x86_mmio_write_unmapped_is_continue() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data = [0xAAu8, 0xBB];
let mut exit = VcpuExit::MmioWrite(0x1000, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"Unmapped MMIO write must classify as Continue"
);
}
}
#[cfg(test)]
mod tests_arch_neutral {
use super::*;
#[test]
fn classify_exit_hlt_returns_none() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut exit = VcpuExit::Hlt;
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(action.is_none(), "Hlt must classify as None");
}
#[test]
fn classify_exit_shutdown_variant_is_shutdown() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut exit = VcpuExit::Shutdown;
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Shutdown)),
"Shutdown variant must classify as ExitAction::Shutdown"
);
}
#[test]
fn classify_exit_system_event_shutdown_is_shutdown() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data: [u64; 0] = [];
let mut exit = VcpuExit::SystemEvent(KVM_SYSTEM_EVENT_SHUTDOWN, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Shutdown)),
"SystemEvent(SHUTDOWN=1) must classify as Shutdown"
);
}
#[test]
fn classify_exit_system_event_reset_is_shutdown() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data: [u64; 0] = [];
let mut exit = VcpuExit::SystemEvent(KVM_SYSTEM_EVENT_RESET, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Shutdown)),
"SystemEvent(RESET=2) must classify as Shutdown"
);
}
#[test]
fn classify_exit_system_event_unknown_type_is_continue() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data: [u64; 0] = [];
let mut exit = VcpuExit::SystemEvent(99, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"SystemEvent with unknown type must classify as Continue, \
not Shutdown — the run loop must not terminate on \
unknown KVM event codes"
);
}
#[test]
fn classify_exit_fail_entry_is_fatal_with_reason() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut exit = VcpuExit::FailEntry(0xdead_beef, 7);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
match action {
Some(ExitAction::Fatal(Some(reason))) => assert_eq!(
reason, 0xdead_beef,
"FailEntry reason must round-trip into Fatal(Some(_))"
),
other => panic!(
"FailEntry must classify as Fatal(Some(reason)); got tag {}",
action_tag(&other)
),
}
}
#[test]
fn classify_exit_internal_error_is_fatal_none() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut exit = VcpuExit::InternalError;
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Fatal(None))),
"InternalError must classify as Fatal(None)"
);
}
fn action_tag(a: &Option<ExitAction>) -> u8 {
match a {
None => 0,
Some(ExitAction::Continue) => 1,
Some(ExitAction::Shutdown) => 2,
Some(ExitAction::Fatal(_)) => 3,
}
}
}
#[cfg(all(test, target_arch = "aarch64"))]
mod tests_aarch64 {
use super::*;
#[test]
fn dispatch_mmio_read_unmapped_returns_0xff() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut buf = [0u8; 4];
dispatch_mmio_read(
&com1, &com2, None, None, None, 0x10_0000, &mut buf,
);
assert_eq!(
buf,
[0xff, 0xff, 0xff, 0xff],
"Unmapped MMIO read must fill the data buffer with 0xFF"
);
}
#[test]
fn dispatch_mmio_read_serial_does_not_fill_0xff() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut buf = [0xAAu8; 1];
dispatch_mmio_read(
&com1,
&com2,
None,
None,
None,
kvm::SERIAL_MMIO_BASE + 5,
&mut buf,
);
assert_ne!(
buf[0], 0xFF,
"Serial MMIO read must invoke the COM1 LSR path, not the \
unmapped 0xFF fallback"
);
}
#[test]
fn classify_exit_aarch64_mmio_write_serial_is_continue() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let data = [b'Q'];
let mut exit = VcpuExit::MmioWrite(kvm::SERIAL_MMIO_BASE, &data);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"aarch64 MmioWrite to serial must classify as Continue"
);
assert!(
com1.lock().output().contains('Q'),
"Serial MMIO write must land the byte in COM1 output"
);
}
#[test]
fn classify_exit_aarch64_mmio_read_unmapped_returns_0xff() {
let com1 = PiMutex::new(console::Serial::new(console::COM1_BASE));
let com2 = PiMutex::new(console::Serial::new(console::COM2_BASE));
let mut buf = [0u8; 4];
let mut exit = VcpuExit::MmioRead(0x10_0000, &mut buf);
let action = classify_exit(&com1, &com2, None, None, None, &mut exit);
assert!(
matches!(action, Some(ExitAction::Continue)),
"Unmapped aarch64 MMIO read must classify as Continue"
);
assert_eq!(
buf,
[0xff, 0xff, 0xff, 0xff],
"Unmapped aarch64 MMIO read must fill with 0xFF"
);
}
}
#[cfg(all(test, target_arch = "x86_64"))]
mod handle_freeze_tests {
use super::*;
use std::sync::atomic::{AtomicBool, Ordering};
#[test]
fn handle_freeze_drain_swallows_eintr_and_resets_state() {
use crate::vmm::kvm::KtstrKvm;
use crate::vmm::topology::Topology;
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let mut vm = KtstrKvm::new(topo, 64, false).unwrap();
crate::vmm::x86_64::boot::setup_sregs(&vm.guest_mem, &vm.vcpus[0], false).unwrap();
let kill = std::sync::Arc::new(AtomicBool::new(false));
let freeze = std::sync::Arc::new(AtomicBool::new(false));
let parked = std::sync::Arc::new(AtomicBool::new(false));
let regs_slot = std::sync::Arc::new(std::sync::Mutex::new(None));
handle_freeze(
&mut vm.vcpus[0],
true, &kill,
&freeze,
&parked,
®s_slot,
None,
None,
None,
);
assert!(
!parked.load(Ordering::Acquire),
"parked must be cleared on exit so subsequent freeze \
cycles can observe a fresh true→false edge"
);
let snapshot = regs_slot.lock().unwrap();
assert!(
snapshot.is_some(),
"capture_vcpu_regs must populate regs_slot for a freshly-\
init'd vCPU — None means KVM_GET_REGS failed unexpectedly"
);
}
#[test]
fn handle_freeze_no_drain_when_immediate_exit_unsupported() {
use crate::vmm::kvm::KtstrKvm;
use crate::vmm::topology::Topology;
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let mut vm = KtstrKvm::new(topo, 64, false).unwrap();
crate::vmm::x86_64::boot::setup_sregs(&vm.guest_mem, &vm.vcpus[0], false).unwrap();
let kill = std::sync::Arc::new(AtomicBool::new(false));
let freeze = std::sync::Arc::new(AtomicBool::new(false));
let parked = std::sync::Arc::new(AtomicBool::new(false));
let regs_slot = std::sync::Arc::new(std::sync::Mutex::new(None));
handle_freeze(
&mut vm.vcpus[0],
false, &kill,
&freeze,
&parked,
®s_slot,
None,
None,
None,
);
assert!(!parked.load(Ordering::Acquire));
assert!(regs_slot.lock().unwrap().is_some());
}
#[test]
fn handle_freeze_writes_parked_evt_edge() {
use crate::vmm::kvm::KtstrKvm;
use crate::vmm::topology::Topology;
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let mut vm = KtstrKvm::new(topo, 64, false).unwrap();
crate::vmm::x86_64::boot::setup_sregs(&vm.guest_mem, &vm.vcpus[0], false).unwrap();
let kill = std::sync::Arc::new(AtomicBool::new(false));
let freeze = std::sync::Arc::new(AtomicBool::new(false));
let parked = std::sync::Arc::new(AtomicBool::new(false));
let regs_slot = std::sync::Arc::new(std::sync::Mutex::new(None));
let parked_evt = EventFd::new(vmm_sys_util::eventfd::EFD_NONBLOCK).unwrap();
handle_freeze(
&mut vm.vcpus[0],
false,
&kill,
&freeze,
&parked,
®s_slot,
Some(&parked_evt),
None,
None,
);
let counter = parked_evt.read().unwrap();
assert_eq!(
counter, 1,
"handle_freeze must write exactly one wake edge to \
parked_evt — coordinator depends on this to advance \
from epoll_wait without spurious extra wakes"
);
}
}
#[cfg(test)]
mod vcpu_reg_snapshot_tests {
use super::*;
#[test]
fn vcpu_reg_snapshot_display_renders_three_hex_fields() {
let s = VcpuRegSnapshot {
instruction_pointer: 0xffff_ffff_8100_1234,
stack_pointer: 0xffff_ffff_8000_0000,
page_table_root: 0x0123_4567_89ab_cdef,
user_page_table_root: None,
tcr_el1: None,
};
let out = format!("{s}");
assert_eq!(
out,
"ip=0xffffffff81001234 sp=0xffffffff80000000 ptroot=0x0123456789abcdef"
);
}
#[test]
fn vcpu_reg_snapshot_display_appends_user_pt_root_when_present() {
let s = VcpuRegSnapshot {
instruction_pointer: 0xffff_8000_8100_1234,
stack_pointer: 0xffff_8000_8000_0000,
page_table_root: 0x0000_4000_8000_0000,
user_page_table_root: Some(0x0000_0000_aaaa_bbbb),
tcr_el1: Some(0xb510_0010),
};
let out = format!("{s}");
assert_eq!(
out,
"ip=0xffff800081001234 sp=0xffff800080000000 ptroot=0x0000400080000000 uptroot=0x00000000aaaabbbb"
);
}
#[test]
fn vcpu_reg_snapshot_serde_round_trip() {
let s = VcpuRegSnapshot {
instruction_pointer: 0x1,
stack_pointer: 0x2,
page_table_root: 0x3,
user_page_table_root: None,
tcr_el1: None,
};
let json = serde_json::to_string(&s).expect("serialize");
assert!(
json.contains("\"instruction_pointer\""),
"missing JSON key `instruction_pointer`: {json}"
);
assert!(
json.contains("\"stack_pointer\""),
"missing JSON key `stack_pointer`: {json}"
);
assert!(
json.contains("\"page_table_root\""),
"missing JSON key `page_table_root`: {json}"
);
assert!(
!json.contains("\"user_page_table_root\""),
"user_page_table_root must skip-serialize when None: {json}"
);
let parsed: VcpuRegSnapshot = serde_json::from_str(&json).expect("deserialize");
assert_eq!(parsed.instruction_pointer, 0x1);
assert_eq!(parsed.stack_pointer, 0x2);
assert_eq!(parsed.page_table_root, 0x3);
assert!(
parsed.user_page_table_root.is_none(),
"missing field must deserialize as None"
);
}
#[test]
fn vcpu_reg_snapshot_serde_round_trip_with_user_pt_root() {
let s = VcpuRegSnapshot {
instruction_pointer: 0x1,
stack_pointer: 0x2,
page_table_root: 0x3,
user_page_table_root: Some(0xdead_beef_cafe_d00d),
tcr_el1: None,
};
let json = serde_json::to_string(&s).expect("serialize");
assert!(
json.contains("\"user_page_table_root\""),
"user_page_table_root must serialize when Some: {json}"
);
let parsed: VcpuRegSnapshot = serde_json::from_str(&json).expect("deserialize");
assert_eq!(parsed.user_page_table_root, Some(0xdead_beef_cafe_d00d));
}
#[test]
fn vcpu_reg_snapshot_serde_round_trip_tcr_el1() {
let some_val: u64 = 0x0000_0000_b510_0010;
let s_some = VcpuRegSnapshot {
instruction_pointer: 0x1,
stack_pointer: 0x2,
page_table_root: 0x3,
user_page_table_root: None,
tcr_el1: Some(some_val),
};
let json_some = serde_json::to_string(&s_some).expect("serialize Some");
assert!(
json_some.contains("\"tcr_el1\""),
"tcr_el1 must serialize when Some: {json_some}"
);
let s_none = VcpuRegSnapshot {
instruction_pointer: 0x1,
stack_pointer: 0x2,
page_table_root: 0x3,
user_page_table_root: None,
tcr_el1: None,
};
let json_none = serde_json::to_string(&s_none).expect("serialize None");
assert!(
!json_none.contains("\"tcr_el1\""),
"tcr_el1 must skip-serialize when None: {json_none}"
);
let parsed_some: VcpuRegSnapshot =
serde_json::from_str(&json_some).expect("deserialize Some");
assert_eq!(parsed_some.tcr_el1, Some(some_val));
let parsed_none: VcpuRegSnapshot =
serde_json::from_str(&json_none).expect("deserialize None");
assert!(
parsed_none.tcr_el1.is_none(),
"missing tcr_el1 must deserialize as None"
);
}
#[test]
fn vcpu_reg_snapshot_zero_renders_zeros() {
let s = VcpuRegSnapshot {
instruction_pointer: 0,
stack_pointer: 0,
page_table_root: 0,
user_page_table_root: None,
tcr_el1: None,
};
assert_eq!(
format!("{s}"),
"ip=0x0000000000000000 sp=0x0000000000000000 ptroot=0x0000000000000000"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn aarch64_register_ids_match_kernel_encoding() {
const KVM_REG_ARM64: u64 = 0x6000_0000_0000_0000;
const KVM_REG_SIZE_U64: u64 = 0x0030_0000_0000_0000;
const KVM_REG_ARM_CORE: u64 = 0x0010_0000;
const KVM_REG_ARM64_SYSREG: u64 = 0x0013_0000;
const EXPECTED_PC_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | 64;
const EXPECTED_SP_EL1_ID: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | 68;
const EXPECTED_TTBR0_EL1_ID: u64 =
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC100;
const EXPECTED_TTBR1_EL1_ID: u64 =
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC101;
let pc_id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | (256 / 4);
let sp_el1_id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | (272 / 4);
let ttbr0_el1_id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC100;
let ttbr1_el1_id = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG | 0xC101;
assert_eq!(pc_id, EXPECTED_PC_ID, "PC_ID encoding drift");
assert_eq!(
sp_el1_id, EXPECTED_SP_EL1_ID,
"SP_EL1_ID encoding drift — note offset is 272 (sp_el1), \
not 248 (sp_el0)"
);
assert_eq!(
ttbr0_el1_id, EXPECTED_TTBR0_EL1_ID,
"TTBR0_EL1_ID encoding drift — verify (Op0=3, Op1=0, \
CRn=2, CRm=0, Op2=0) packs to 0xC100"
);
assert_eq!(
ttbr1_el1_id, EXPECTED_TTBR1_EL1_ID,
"TTBR1_EL1_ID encoding drift — verify (Op0=3, Op1=0, \
CRn=2, CRm=0, Op2=1) packs to 0xC101"
);
assert_eq!(
ttbr1_el1_id - ttbr0_el1_id,
1,
"TTBR0/TTBR1 encodings should differ by exactly 1 (Op2 bit)"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn dr7_slot0_write_4byte_encoding() {
const DR7_FIXED_1: u64 = 1 << 10;
const DR_LOCAL_EXACT: u64 = 1 << 8; const DR_GLOBAL_EXACT: u64 = 1 << 9; const DR_LOCAL_ENABLE: u64 = 1 << 0; const DR_GLOBAL_ENABLE: u64 = 1 << 1; const DR_RW_WRITE: u64 = 0b01;
const DR_LEN_4: u64 = 0b11;
const SLOT0_RW_SHIFT: u32 = 16;
const SLOT0_LEN_SHIFT: u32 = 18;
let dr7 = DR7_FIXED_1
| DR_GLOBAL_EXACT
| DR_LOCAL_EXACT
| DR_LOCAL_ENABLE
| DR_GLOBAL_ENABLE
| (DR_RW_WRITE << SLOT0_RW_SHIFT)
| (DR_LEN_4 << SLOT0_LEN_SHIFT);
assert_eq!(
dr7, 0xD0703,
"DR7 encoding for (slot=0, write, 4B) must match the production wire format"
);
assert_ne!(dr7 & (1 << 0), 0, "L0 (bit 0) must be set");
assert_ne!(dr7 & (1 << 1), 0, "G0 (bit 1) must be set");
assert_ne!(
dr7 & (1 << 8),
0,
"LE (bit 8) must be set for data breakpoints"
);
assert_ne!(
dr7 & (1 << 9),
0,
"GE (bit 9) must be set for data breakpoints"
);
assert_ne!(dr7 & (1 << 10), 0, "DR7_FIXED_1 (bit 10) must be set");
assert_eq!(
(dr7 >> SLOT0_RW_SHIFT) & 0b11,
DR_RW_WRITE,
"slot 0 R/W field must encode write (0b01)"
);
assert_eq!(
(dr7 >> SLOT0_LEN_SHIFT) & 0b11,
DR_LEN_4,
"slot 0 LEN field must encode 4 bytes (0b11)"
);
assert_eq!(
dr7 & 0b1111_1100,
0,
"slots 1..3 must be disabled (L/G bits clear)"
);
assert_eq!(
(dr7 >> 20) & 0xFFF,
0,
"slots 1..3 R/W + LEN fields must be zero"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn dbgwcr_slot0_write_4byte_encoding_offset0() {
let kva: u64 = 0xffff_ffff_8100_1000; let byte_offset = (kva & 0x7u64) as u32;
let bas: u64 = 0xFu64 << byte_offset;
let wcr: u64 = 1u64 | (0b11u64 << 1) | (0b10u64 << 3) | (bas << 5);
assert_eq!(
wcr, 0x1F7,
"DBGWCR encoding for (slot=0, write, 4B, offset=0) must \
match the QEMU/ARM ARM gold-standard wire format"
);
assert_eq!(wcr & 1, 1, "E (bit 0) must be set");
assert_eq!(
(wcr >> 1) & 0b11,
0b11,
"PAC (bits 2:1) must be 0b11 (EL0+EL1)"
);
assert_eq!(
(wcr >> 3) & 0b11,
0b10,
"LSC (bits 4:3) must be 0b10 (write-only)"
);
assert_eq!(
(wcr >> 5) & 0xFF,
0x0F,
"BAS (bits 12:5) must be 0x0F for offset=0 (4 \
contiguous low bytes)"
);
assert_eq!(
(wcr >> 13) & 0xF,
0,
"HMC + low SSC bit (bits 16:13) must be zero"
);
assert_eq!((wcr >> 20) & 0xF, 0, "WT + LBN must be zero");
assert_eq!((wcr >> 24) & 0x1F, 0, "MASK must be zero");
}
#[test]
#[cfg(target_arch = "aarch64")]
fn dbgwcr_slot0_write_4byte_encoding_offset4() {
let kva: u64 = 0xffff_ffff_8100_1004; let byte_offset = (kva & 0x7u64) as u32;
let bas: u64 = 0xFu64 << byte_offset;
let wcr: u64 = 1u64 | (0b11u64 << 1) | (0b10u64 << 3) | (bas << 5);
assert_eq!(
wcr, 0x1E17,
"DBGWCR encoding for (slot=0, write, 4B, offset=4) must \
match `0x1 | (3<<1) | (2<<3) | (0xF0 << 5)` = 0x1E17"
);
assert_eq!(
(wcr >> 5) & 0xFF,
0xF0,
"BAS (bits 12:5) must be 0xF0 for offset=4 (4 \
contiguous high bytes)"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn dbgwvr_8byte_aligned_base() {
let kva: u64 = 0xffff_ffff_8100_1004;
let wvr = kva & !0x7u64;
assert_eq!(
wvr, 0xffff_ffff_8100_1000,
"DBGWVR base must clear the bottom 3 bits (8-byte align) \
so BAS picks the 4 watched bytes within the block"
);
let byte_offset = (kva & 0x7u64) as u32;
let bas: u64 = 0xFu64 << byte_offset;
let watched_lo = wvr + (bas.trailing_zeros() as u64);
let watched_hi = watched_lo + (bas.count_ones() as u64);
assert_eq!(
watched_lo, kva,
"watched range low must equal the original KVA"
);
assert_eq!(
watched_hi,
kva + 4,
"watched range high must equal kva + 4 (4 bytes)"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn watchpoint_slot_decode_from_far_user_slot() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let armed_slots: [u64; 4] = [
0,
0xffff_ffff_8100_1000,
0xffff_ffff_8100_1004,
0xffff_ffff_8100_1008,
];
let far = 0xffff_ffff_8100_1004u64;
let hsr = (super::ESR_ELX_EC_WATCHPT_LOW) << super::ESR_ELX_EC_SHIFT;
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
hsr,
hsr_high: 0,
far,
};
let mut single_step_pending = false;
let mut single_step_slot: usize = 99;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
!watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"slot 0 (exit_kind) must not latch when a different \
slot's range matches FAR"
);
assert!(
!watchpoint.user[0]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[0] / slot 1 must not latch — FAR is inside slot 2's \
range"
);
assert!(
watchpoint.user[1]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[1] / slot 2 must latch — FAR equals the slot's KVA"
);
assert!(
!watchpoint.user[2]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[2] / slot 3 must not latch — FAR is outside its \
range"
);
assert!(
single_step_pending,
"single_step_pending must be set when a watchpoint match \
latches; without this the next KVM_RUN replays the same \
store and re-trips the watchpoint forever (ARM ARM \
D2.10.5)"
);
assert_eq!(
single_step_slot, 0b0100,
"single_step_slot bitmap must encode slot 2 (bit 2 = 1, \
0b0100) so self_arm_watchpoint clears WCR[2].E and \
leaves WCR[0/1/3].E armed for the single-step pass"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn watchpoint_dispatch_ignores_non_watchpt_ec() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let armed_slots: [u64; 4] = [
0xffff_ffff_8100_1000,
0xffff_ffff_8100_1004,
0xffff_ffff_8100_1008,
0xffff_ffff_8100_100C,
];
let hsr = 0x32u32 << super::ESR_ELX_EC_SHIFT;
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
hsr,
hsr_high: 0,
far: 0xffff_ffff_8100_1004,
};
let mut single_step_pending = false;
let mut single_step_slot: usize = 99;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
!watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"soft-step EC must not latch slot 0"
);
for (i, slot) in watchpoint.user.iter().enumerate() {
assert!(
!slot.hit.load(std::sync::atomic::Ordering::Acquire),
"soft-step EC must not latch user[{i}]"
);
}
assert!(
!single_step_pending,
"spurious soft-step exit (no pending step) must leave \
single_step_pending unchanged"
);
assert_eq!(
single_step_slot, 99,
"spurious soft-step exit must not clobber single_step_slot"
);
}
#[test]
#[cfg(target_arch = "aarch64")]
fn watchpoint_softstep_clears_single_step_pending() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let armed_slots: [u64; 4] = [0, 0xffff_ffff_8100_1000, 0, 0];
let hsr = super::ESR_ELX_EC_SOFTSTP_LOW << super::ESR_ELX_EC_SHIFT;
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
hsr,
hsr_high: 0,
far: 0,
};
let mut single_step_pending = true;
let mut single_step_slot: usize = 1;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
!single_step_pending,
"SOFTSTP_LOW with pending step must clear \
single_step_pending so the next self_arm_watchpoint \
call restores WCR.E=1 and drops KVM_GUESTDBG_SINGLESTEP"
);
assert!(
!watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"SOFTSTP_LOW must not latch slot 0 (the WATCHPT_LOW \
exit that preceded it already did)"
);
for (i, slot) in watchpoint.user.iter().enumerate() {
assert!(
!slot.hit.load(std::sync::atomic::Ordering::Acquire),
"SOFTSTP_LOW must not latch user[{i}]"
);
}
}
#[test]
#[cfg(target_arch = "aarch64")]
fn watchpoint_slot0_skips_latch_when_host_ptr_null() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let armed_slots: [u64; 4] = [0xffff_ffff_8100_1000, 0, 0, 0];
let hsr = (super::ESR_ELX_EC_WATCHPT_LOW) << super::ESR_ELX_EC_SHIFT;
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
hsr,
hsr_high: 0,
far: 0xffff_ffff_8100_1000,
};
let mut single_step_pending = false;
let mut single_step_slot: usize = 0;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
!watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"slot 0 must NOT latch hit when kind_host_ptr is null — \
a null observation is a publication-invariant violation, \
not a fallback trigger"
);
assert!(
single_step_pending,
"single_step_pending must be set when the FAR matches a \
slot, regardless of slot-0 latch outcome"
);
assert_eq!(
single_step_slot, 0b0001,
"single_step_slot bitmap must include slot 0 (bit 0 = 1) \
so self_arm_watchpoint clears WCR[0].E during the \
single-step pass"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn watchpoint_dispatch_x86_dr6_b2_latches_user_slot_1() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let armed_slots: [u64; 4] = [0; 4];
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
exception: 0,
pad: 0,
pc: 0,
dr6: 0x4,
dr7: 0,
};
let mut single_step_pending = false;
let mut single_step_slot: usize = 99;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
!watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"slot 0 (exit_kind) must not latch when DR6 B0 is clear"
);
assert!(
!watchpoint.user[0]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[0] / slot 1 must not latch — DR6 B1 is clear"
);
assert!(
watchpoint.user[1]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[1] / slot 2 must latch — DR6 B2 is set"
);
assert!(
!watchpoint.user[2]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[2] / slot 3 must not latch — DR6 B3 is clear"
);
assert!(
!single_step_pending,
"x86 dispatch must never set single_step_pending — \
single-step is aarch64-only"
);
assert_eq!(
single_step_slot, 99,
"x86 dispatch must not clobber single_step_slot — \
single-step is aarch64-only"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn watchpoint_dispatch_x86_dr6_multi_match() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
let kind: u32 = super::SCX_EXIT_ERROR_THRESHOLD;
let kind_box = Box::new(kind);
let kind_ptr = Box::into_raw(kind_box);
watchpoint
.kind_host_ptr
.store(kind_ptr, std::sync::atomic::Ordering::Release);
let armed_slots: [u64; 4] = [0; 4];
let debug_arch = kvm_bindings::kvm_debug_exit_arch {
exception: 0,
pad: 0,
pc: 0,
dr6: 0x5,
dr7: 0,
};
let mut single_step_pending = false;
let mut single_step_slot: usize = 99;
super::dispatch_watchpoint_hit(
&watchpoint,
&debug_arch,
&armed_slots,
&mut single_step_pending,
&mut single_step_slot,
);
assert!(
watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"slot 0 (exit_kind) must latch — DR6 B0 set + kind ≥ \
SCX_EXIT_ERROR_THRESHOLD"
);
assert!(
!watchpoint.user[0]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[0] / slot 1 must not latch — DR6 B1 is clear"
);
assert!(
watchpoint.user[1]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[1] / slot 2 must latch — DR6 B2 is set, even \
though slot 0 latched first in iteration order"
);
assert!(
!watchpoint.user[2]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"user[2] / slot 3 must not latch — DR6 B3 is clear"
);
let _ = unsafe { Box::from_raw(kind_ptr) };
}
#[test]
fn latch_hit_is_idempotent_across_repeat_calls() {
use crate::vmm::vcpu::WatchpointArm;
use std::os::fd::AsRawFd;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
watchpoint.latch_hit();
assert!(
watchpoint.hit.load(std::sync::atomic::Ordering::Acquire),
"first latch_hit must flip hit=false→true"
);
let mut buf = [0u8; 8];
let n = unsafe {
libc::read(
watchpoint.hit_evt.as_raw_fd(),
buf.as_mut_ptr() as *mut libc::c_void,
buf.len(),
)
};
assert_eq!(
n, 8,
"first latch_hit must produce one eventfd edge \
(8-byte counter read)"
);
let count_after_first = u64::from_ne_bytes(buf);
assert_eq!(
count_after_first, 1,
"first latch_hit must increment counter by exactly 1"
);
watchpoint.latch_hit();
let mut buf2 = [0u8; 8];
let n2 = unsafe {
libc::read(
watchpoint.hit_evt.as_raw_fd(),
buf2.as_mut_ptr() as *mut libc::c_void,
buf2.len(),
)
};
let errno = unsafe { *libc::__errno_location() };
assert!(
n2 == -1 && errno == libc::EAGAIN,
"second latch_hit on already-latched slot must NOT \
write to hit_evt (cross-vCPU dedup); read should \
return EAGAIN, got n={n2} errno={errno}"
);
}
#[test]
fn latch_user_hit_is_idempotent_across_repeat_calls() {
use crate::vmm::vcpu::WatchpointArm;
use std::os::fd::AsRawFd;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
watchpoint.latch_user_hit(1);
assert!(
watchpoint.user[1]
.hit
.load(std::sync::atomic::Ordering::Acquire),
"first latch_user_hit(1) must flip user[1].hit=false→true"
);
let mut buf = [0u8; 8];
let n = unsafe {
libc::read(
watchpoint.hit_evt.as_raw_fd(),
buf.as_mut_ptr() as *mut libc::c_void,
buf.len(),
)
};
assert_eq!(n, 8, "first latch_user_hit(1) must write eventfd");
let count_after_first = u64::from_ne_bytes(buf);
assert_eq!(count_after_first, 1, "counter increment must be 1");
watchpoint.latch_user_hit(1);
let mut buf2 = [0u8; 8];
let n2 = unsafe {
libc::read(
watchpoint.hit_evt.as_raw_fd(),
buf2.as_mut_ptr() as *mut libc::c_void,
buf2.len(),
)
};
let errno = unsafe { *libc::__errno_location() };
assert!(
n2 == -1 && errno == libc::EAGAIN,
"second latch_user_hit(1) on already-latched slot \
must NOT write to hit_evt; read should return \
EAGAIN, got n={n2} errno={errno}"
);
watchpoint.latch_user_hit(99);
for (i, slot) in watchpoint.user.iter().enumerate() {
if i == 1 {
assert!(
slot.hit.load(std::sync::atomic::Ordering::Acquire),
"user[1].hit must remain latched"
);
} else {
assert!(
!slot.hit.load(std::sync::atomic::Ordering::Acquire),
"user[{i}].hit must remain unlatched after \
out-of-range latch_user_hit(99)"
);
}
}
}
#[test]
fn mark_armed_flips_gate_and_is_idempotent() {
use crate::vmm::vcpu::WatchpointArm;
let watchpoint = WatchpointArm::new().expect("WatchpointArm::new");
assert_eq!(
watchpoint
.any_armed
.load(std::sync::atomic::Ordering::Relaxed),
0,
"newly-constructed WatchpointArm must have any_armed=0 \
so self_arm_watchpoint short-circuits before any \
publisher fires"
);
watchpoint.mark_armed();
assert_eq!(
watchpoint
.any_armed
.load(std::sync::atomic::Ordering::Relaxed),
1,
"first mark_armed call must flip the gate to 1"
);
watchpoint.mark_armed();
assert_eq!(
watchpoint
.any_armed
.load(std::sync::atomic::Ordering::Relaxed),
1,
"second mark_armed call must leave the gate at 1 \
(idempotent — mark_armed is `store(1)`, not `+= 1`)"
);
}
}