use alloc::collections::VecDeque;
use core::{
arch::naked_asm,
fmt::{Debug, Formatter, Result},
mem::size_of,
};
use ax_errno::{AxResult, ax_err, ax_err_type};
use ax_memory_addr::AddrRange;
use axdevice_base::{BaseDeviceOps, SysRegAddrRange};
use axvcpu::{
AccessWidth, AxArchVCpu, AxVCpuExitReason, GuestPhysAddr, HostPhysAddr, MappingFlags,
NestedPageFaultInfo, Port, SysRegAddr, VCpuId, VMId,
};
use axvm_types::GuestVirtAddr;
use bit_field::BitField;
use raw_cpuid::CpuId;
use x86::{
bits64::vmx,
controlregs::Xcr0,
dtables::{self, DescriptorTablePointer},
segmentation::SegmentSelector,
};
use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags, EferFlags};
use x86_vlapic::EmulatedLocalApic;
use super::{
VmxExitInfo, as_axerr,
definitions::VmxExitReason,
structs::{IOBitmap, MsrBitmap, VmxRegion},
vmcs::{
self, ApicAccessExitType, VmcsControl32, VmcsControl64, VmcsControlNW, VmcsGuest16,
VmcsGuest32, VmcsGuest64, VmcsGuestNW, VmcsHost16, VmcsHost32, VmcsHost64, VmcsHostNW,
},
};
use crate::{
X86VCpuSetupConfig, ept::GuestPageWalkInfo, host, msr::Msr, regs::GeneralRegisters,
restore_host_interrupt_flag, x86_real_mode_entry_state, xstate::XState,
};
const VMX_PREEMPTION_TIMER_SET_VALUE: u32 = 100_000;
const QEMU_EXIT_PORT: u16 = 0x604;
const QEMU_EXIT_MAGIC: u64 = 0x2000;
const X86_PIT_PORT_BASE: u16 = 0x40;
const X86_PIT_PORT_COUNT: u32 = 4;
const X86_PIT_SPEAKER_PORT: u16 = 0x61;
const X86_COM1_PORT_BASE: u16 = 0x3f8;
const X86_COM1_PORT_COUNT: u32 = 8;
pub const X86_APIC_ACCESS_GPA: usize = 0xfee0_0000;
const X86_IOAPIC_BASE: usize = 0xfec0_0000;
const X86_IOAPIC_SIZE: usize = 0x1000;
#[derive(PartialEq, Eq, Debug)]
pub enum VmCpuMode {
Real,
Protected,
Compatibility, Mode64, }
const MSR_IA32_EFER_LMA_BIT: u64 = 1 << 10;
const CR0_PE: usize = 1 << 0;
#[derive(Clone, Copy, Debug)]
struct PendingEvent {
vector: u8,
err_code: Option<u32>,
level_triggered: bool,
}
#[repr(C)]
pub struct VmxVcpu {
guest_regs: GeneralRegisters,
host_stack_top: u64,
host_rflags: u64,
launched: bool,
entry: Option<GuestPhysAddr>,
ept_root: Option<HostPhysAddr>,
vmcs: VmxRegion,
io_bitmap: IOBitmap,
msr_bitmap: MsrBitmap,
pending_events: VecDeque<PendingEvent>,
vlapic: EmulatedLocalApic,
xstate: XState,
#[cfg(feature = "tracing")]
guest_regs_exiting: GeneralRegisters,
}
impl VmxVcpu {
pub fn new(vm_id: VMId, vcpu_id: VCpuId) -> AxResult<Self> {
let vmcs_revision_id = super::read_vmcs_revision_id();
let vcpu = Self {
guest_regs: GeneralRegisters::default(),
host_stack_top: 0,
host_rflags: 0,
launched: false,
entry: None,
ept_root: None,
vmcs: VmxRegion::new(vmcs_revision_id, false)?,
io_bitmap: IOBitmap::passthrough_all()?,
msr_bitmap: MsrBitmap::passthrough_all()?,
pending_events: VecDeque::with_capacity(8),
vlapic: EmulatedLocalApic::new(vm_id, vcpu_id),
xstate: XState::new(),
#[cfg(feature = "tracing")]
guest_regs_exiting: GeneralRegisters::default(),
};
info!("[HV] created VmxVcpu(vmcs: {:#x})", vcpu.vmcs.phys_addr());
Ok(vcpu)
}
pub fn setup(&mut self, ept_root: HostPhysAddr, entry: GuestPhysAddr) -> AxResult {
self.setup_vmcs(entry, ept_root, X86VCpuSetupConfig::default())?;
Ok(())
}
pub fn bind_to_current_processor(&self) -> AxResult {
debug!(
"VmxVcpu bind to current processor vmcs @ {:#x}",
self.vmcs.phys_addr()
);
unsafe {
vmx::vmptrld(self.vmcs.phys_addr().as_usize() as u64).map_err(as_axerr)?;
}
self.setup_vmcs_host()?;
Ok(())
}
pub fn unbind_from_current_processor(&self) -> AxResult {
debug!(
"VmxVcpu unbind from current processor vmcs @ {:#x}",
self.vmcs.phys_addr()
);
unsafe {
vmx::vmclear(self.vmcs.phys_addr().as_usize() as u64).map_err(as_axerr)?;
}
Ok(())
}
pub fn get_cpu_mode(&self) -> VmCpuMode {
let ia32_efer = Msr::IA32_EFER.read();
let cs_access_right = VmcsGuest32::CS_ACCESS_RIGHTS.read().unwrap();
let cr0 = VmcsGuestNW::CR0.read().unwrap();
if (ia32_efer & MSR_IA32_EFER_LMA_BIT) != 0 {
if (cs_access_right & 0x2000) != 0 {
VmCpuMode::Mode64
} else {
VmCpuMode::Compatibility
}
} else if (cr0 & CR0_PE) != 0 {
VmCpuMode::Protected
} else {
VmCpuMode::Real
}
}
pub fn inner_run(&mut self) -> AxResult<Option<VmxExitInfo>> {
self.inject_pending_events()?;
self.load_guest_xstate();
#[cfg(feature = "tracing")]
{
use crate::regs::GeneralRegistersDiff;
let diff = GeneralRegistersDiff::new(self.guest_regs_exiting, self.guest_regs);
if !diff.is_same() {
debug!("VCpu registers changed during handling VM-exit: {diff:#x?}");
} else {
debug!("VCpu registers unchanged during handling VM-exit");
}
}
unsafe {
if self.launched {
self.vmx_resume();
} else {
self.launched = true;
VmcsHostNW::RSP
.write(&self.host_stack_top as *const _ as usize)
.unwrap();
self.vmx_launch();
}
}
self.load_host_xstate();
restore_host_interrupt_flag(self.host_rflags);
#[cfg(feature = "tracing")]
{
self.guest_regs_exiting = self.guest_regs;
}
let exit_info = self.exit_info().unwrap();
match self.builtin_vmexit_handler(&exit_info) {
Some(result) => match result {
Ok(()) => Ok(None),
Err(err) => {
panic!(
"VmxVcpu failed to handle a VM-exit that should be handled by itself: \
{:?}, error {:?}, vcpu: {:#x?}",
exit_info.exit_reason, err, self
);
}
},
None => Ok(Some(exit_info)),
}
}
pub fn exit_info(&self) -> AxResult<vmcs::VmxExitInfo> {
vmcs::exit_info()
}
pub fn raw_interrupt_exit_info(&self) -> AxResult<u32> {
vmcs::raw_interrupt_exit_info()
}
pub fn interrupt_exit_info(&self) -> AxResult<vmcs::VmxInterruptInfo> {
vmcs::interrupt_exit_info()
}
pub fn io_exit_info(&self) -> AxResult<vmcs::VmxIoExitInfo> {
vmcs::io_exit_info()
}
pub fn nested_page_fault_info(&self) -> AxResult<NestedPageFaultInfo> {
vmcs::ept_violation_info()
}
pub fn apic_access_exit_info(&self) -> AxResult<vmcs::ApicAccessExitInfo> {
vmcs::apic_access_exit_info()
}
pub fn regs(&self) -> &GeneralRegisters {
&self.guest_regs
}
pub fn regs_mut(&mut self) -> &mut GeneralRegisters {
&mut self.guest_regs
}
pub fn stack_pointer(&self) -> usize {
VmcsGuestNW::RSP.read().unwrap()
}
pub fn set_stack_pointer(&mut self, rsp: usize) {
VmcsGuestNW::RSP.write(rsp).unwrap()
}
pub fn gla2gva(&self, guest_rip: GuestVirtAddr) -> GuestVirtAddr {
let cpu_mode = self.get_cpu_mode();
let seg_base = if cpu_mode == VmCpuMode::Mode64 {
0
} else {
VmcsGuestNW::CS_BASE.read().unwrap()
};
guest_rip + seg_base
}
pub fn get_ptw_info(&self) -> GuestPageWalkInfo {
let top_entry = VmcsGuestNW::CR3.read().unwrap();
let level = self.get_paging_level();
let is_write_access = false;
let is_inst_fetch = false;
let is_user_mode_access = ((VmcsGuest32::SS_ACCESS_RIGHTS.read().unwrap() >> 5) & 0x3) == 3;
let mut pse = true;
let mut nxe =
(VmcsGuest64::IA32_EFER.read().unwrap() & EferFlags::NO_EXECUTE_ENABLE.bits()) != 0;
let wp = (VmcsGuestNW::CR0.read().unwrap() & Cr0Flags::WRITE_PROTECT.bits() as usize) != 0;
let is_smap_on = (VmcsGuestNW::CR4.read().unwrap()
& Cr4Flags::SUPERVISOR_MODE_ACCESS_PREVENTION.bits() as usize)
!= 0;
let is_smep_on = (VmcsGuestNW::CR4.read().unwrap()
& Cr4Flags::SUPERVISOR_MODE_EXECUTION_PROTECTION.bits() as usize)
!= 0;
let width: u32;
if level == 4 || level == 3 {
width = 9;
} else if level == 2 {
width = 10;
pse = VmcsGuestNW::CR4.read().unwrap() & Cr4Flags::PAGE_SIZE_EXTENSION.bits() as usize
!= 0;
nxe = false;
} else {
width = 0;
}
GuestPageWalkInfo {
top_entry,
level,
width,
is_user_mode_access,
is_write_access,
is_inst_fetch,
pse,
wp,
nxe,
is_smap_on,
is_smep_on,
}
}
pub fn rip(&self) -> usize {
VmcsGuestNW::RIP.read().unwrap()
}
pub fn cs(&self) -> u16 {
VmcsGuest16::CS_SELECTOR.read().unwrap()
}
pub fn advance_rip(&mut self, instr_len: u8) -> AxResult {
VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)
}
pub fn queue_event(&mut self, vector: u8, err_code: Option<u32>) {
self.pending_events.push_back(PendingEvent {
vector,
err_code,
level_triggered: false,
});
}
pub fn queue_event_with_trigger(
&mut self,
vector: u8,
err_code: Option<u32>,
level_triggered: bool,
) {
self.pending_events.push_back(PendingEvent {
vector,
err_code,
level_triggered,
});
}
pub fn set_interrupt_window(&mut self, enable: bool) -> AxResult {
let mut ctrl = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?;
let bits = vmcs::controls::PrimaryControls::INTERRUPT_WINDOW_EXITING.bits();
if enable {
ctrl |= bits
} else {
ctrl &= !bits
}
VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?;
Ok(())
}
pub fn set_io_intercept_of_range(&mut self, port_base: u32, count: u32, intercept: bool) {
self.io_bitmap
.set_intercept_of_range(port_base, count, intercept)
}
pub fn set_msr_intercept_of_range(&mut self, msr: u32, intercept: bool) {
self.msr_bitmap.set_read_intercept(msr, intercept);
self.msr_bitmap.set_write_intercept(msr, intercept);
}
}
impl VmxVcpu {
fn setup_io_bitmap(&mut self, config: X86VCpuSetupConfig) -> AxResult {
let io_to_be_intercepted = QEMU_EXIT_PORT..QEMU_EXIT_PORT + 1; self.io_bitmap.set_intercept_of_range(
io_to_be_intercepted.start as _,
io_to_be_intercepted.count() as u32,
true,
);
self.io_bitmap
.set_intercept_of_range(X86_PIT_PORT_BASE as u32, X86_PIT_PORT_COUNT, true);
self.io_bitmap
.set_intercept(X86_PIT_SPEAKER_PORT as u32, true);
if config.emulate_com1 {
self.io_bitmap.set_intercept_of_range(
X86_COM1_PORT_BASE as u32,
X86_COM1_PORT_COUNT,
true,
);
}
Ok(())
}
#[allow(dead_code)]
fn setup_msr_bitmap(&mut self) -> AxResult {
const IA32_APIC_BASE: u32 = 0x1b;
self.msr_bitmap.set_read_intercept(IA32_APIC_BASE, true);
self.msr_bitmap.set_write_intercept(IA32_APIC_BASE, true);
const IA32_UMWAIT_CONTROL: u32 = 0xe1;
self.msr_bitmap
.set_write_intercept(IA32_UMWAIT_CONTROL, true);
self.msr_bitmap
.set_read_intercept(IA32_UMWAIT_CONTROL, true);
for msr in 0x800..=0x83f {
self.msr_bitmap.set_read_intercept(msr, true);
self.msr_bitmap.set_write_intercept(msr, true);
}
Ok(())
}
fn setup_vmcs(
&mut self,
entry: GuestPhysAddr,
ept_root: HostPhysAddr,
config: X86VCpuSetupConfig,
) -> AxResult {
let paddr = self.vmcs.phys_addr().as_usize() as u64;
unsafe {
vmx::vmclear(paddr).map_err(as_axerr)?;
}
self.bind_to_current_processor()?;
self.setup_msr_bitmap()?;
self.setup_vmcs_guest(entry)?;
self.setup_vmcs_control(ept_root, true, config)?;
self.unbind_from_current_processor()?;
Ok(())
}
fn setup_vmcs_host(&self) -> AxResult {
VmcsHost64::IA32_PAT.write(Msr::IA32_PAT.read())?;
VmcsHost64::IA32_EFER.write(Msr::IA32_EFER.read())?;
VmcsHostNW::CR0.write(Cr0::read_raw() as _)?;
VmcsHostNW::CR3.write(Cr3::read_raw().0.start_address().as_u64() as _)?;
VmcsHostNW::CR4.write(Cr4::read_raw() as _)?;
VmcsHost16::ES_SELECTOR.write(x86::segmentation::es().bits())?;
VmcsHost16::CS_SELECTOR.write(x86::segmentation::cs().bits())?;
VmcsHost16::SS_SELECTOR.write(x86::segmentation::ss().bits())?;
VmcsHost16::DS_SELECTOR.write(x86::segmentation::ds().bits())?;
VmcsHost16::FS_SELECTOR.write(x86::segmentation::fs().bits())?;
VmcsHost16::GS_SELECTOR.write(x86::segmentation::gs().bits())?;
VmcsHostNW::FS_BASE.write(Msr::IA32_FS_BASE.read() as _)?;
VmcsHostNW::GS_BASE.write(Msr::IA32_GS_BASE.read() as _)?;
let tr = unsafe { x86::task::tr() };
let mut gdtp = DescriptorTablePointer::<u64>::default();
let mut idtp = DescriptorTablePointer::<u64>::default();
unsafe {
dtables::sgdt(&mut gdtp);
dtables::sidt(&mut idtp);
}
VmcsHost16::TR_SELECTOR.write(tr.bits())?;
VmcsHostNW::TR_BASE.write(get_tr_base(tr, &gdtp) as _)?;
VmcsHostNW::GDTR_BASE.write(gdtp.base as _)?;
VmcsHostNW::IDTR_BASE.write(idtp.base as _)?;
VmcsHostNW::RIP.write(Self::vmx_exit as *const () as usize)?;
VmcsHostNW::IA32_SYSENTER_ESP.write(0)?;
VmcsHostNW::IA32_SYSENTER_EIP.write(0)?;
VmcsHost32::IA32_SYSENTER_CS.write(0)?;
Ok(())
}
fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr) -> AxResult {
let entry_state = x86_real_mode_entry_state(entry);
let cr0_val: Cr0Flags =
Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE | Cr0Flags::EXTENSION_TYPE;
self.set_cr(0, cr0_val.bits());
self.set_cr(4, 0);
macro_rules! set_guest_segment {
($seg:ident, $access_rights:expr) => {{
use VmcsGuest16::*;
use VmcsGuest32::*;
use VmcsGuestNW::*;
paste::paste! {
[<$seg _SELECTOR>].write(0)?;
[<$seg _BASE>].write(0)?;
[<$seg _LIMIT>].write(0xffff)?;
[<$seg _ACCESS_RIGHTS>].write($access_rights)?;
}
}};
}
set_guest_segment!(ES, 0x93); set_guest_segment!(CS, 0x9b); VmcsGuest16::CS_SELECTOR.write(entry_state.cs_selector)?;
VmcsGuestNW::CS_BASE.write(entry_state.cs_base)?;
set_guest_segment!(SS, 0x93);
set_guest_segment!(DS, 0x93);
set_guest_segment!(FS, 0x93);
set_guest_segment!(GS, 0x93);
set_guest_segment!(TR, 0x8b); set_guest_segment!(LDTR, 0x82);
VmcsGuestNW::GDTR_BASE.write(0)?;
VmcsGuest32::GDTR_LIMIT.write(0xffff)?;
VmcsGuestNW::IDTR_BASE.write(0)?;
VmcsGuest32::IDTR_LIMIT.write(0xffff)?;
VmcsGuestNW::CR3.write(0)?;
VmcsGuestNW::DR7.write(0x400)?;
VmcsGuestNW::RSP.write(0)?;
VmcsGuestNW::RIP.write(entry_state.rip)?;
VmcsGuestNW::RFLAGS.write(0x2)?;
VmcsGuestNW::PENDING_DBG_EXCEPTIONS.write(0)?;
VmcsGuestNW::IA32_SYSENTER_ESP.write(0)?;
VmcsGuestNW::IA32_SYSENTER_EIP.write(0)?;
VmcsGuest32::IA32_SYSENTER_CS.write(0)?;
VmcsGuest32::INTERRUPTIBILITY_STATE.write(0)?;
VmcsGuest32::ACTIVITY_STATE.write(0)?;
VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(VMX_PREEMPTION_TIMER_SET_VALUE)?;
VmcsGuest64::LINK_PTR.write(u64::MAX)?; VmcsGuest64::IA32_DEBUGCTL.write(0)?;
VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?;
VmcsGuest64::IA32_EFER.write(0)?;
Ok(())
}
fn setup_vmcs_control(
&mut self,
ept_root: HostPhysAddr,
is_guest: bool,
config: X86VCpuSetupConfig,
) -> AxResult {
use PinbasedControls as PinCtrl;
use super::vmcs::controls::*;
let raw_cpuid = CpuId::new();
vmcs::set_control(
VmcsControl32::PINBASED_EXEC_CONTROLS,
Msr::IA32_VMX_TRUE_PINBASED_CTLS,
Msr::IA32_VMX_PINBASED_CTLS.read() as u32,
(PinCtrl::NMI_EXITING
| PinCtrl::EXTERNAL_INTERRUPT_EXITING
| PinCtrl::VMX_PREEMPTION_TIMER)
.bits(),
0,
)?;
use PrimaryControls as CpuCtrl;
vmcs::set_control(
VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS,
Msr::IA32_VMX_TRUE_PROCBASED_CTLS,
Msr::IA32_VMX_PROCBASED_CTLS.read() as u32,
(CpuCtrl::USE_IO_BITMAPS
| CpuCtrl::USE_MSR_BITMAPS
| CpuCtrl::USE_TPR_SHADOW
| CpuCtrl::SECONDARY_CONTROLS)
.bits(),
(CpuCtrl::CR3_LOAD_EXITING
| CpuCtrl::CR3_STORE_EXITING
| CpuCtrl::CR8_LOAD_EXITING
| CpuCtrl::CR8_STORE_EXITING)
.bits(),
)?;
use SecondaryControls as CpuCtrl2;
let mut val = CpuCtrl2::VIRTUALIZE_APIC
| CpuCtrl2::VIRTUAL_INTERRUPT_DELIVERY
| CpuCtrl2::ENABLE_EPT
| CpuCtrl2::UNRESTRICTED_GUEST;
if let Some(features) = raw_cpuid.get_extended_processor_and_feature_identifiers()
&& features.has_rdtscp()
{
val |= CpuCtrl2::ENABLE_RDTSCP;
}
if let Some(features) = raw_cpuid.get_extended_feature_info()
&& features.has_invpcid()
{
val |= CpuCtrl2::ENABLE_INVPCID;
}
if let Some(features) = raw_cpuid.get_extended_state_info()
&& features.has_xsaves_xrstors()
{
val |= CpuCtrl2::ENABLE_XSAVES_XRSTORS;
}
vmcs::set_control(
VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS,
Msr::IA32_VMX_PROCBASED_CTLS2,
Msr::IA32_VMX_PROCBASED_CTLS2.read() as u32,
val.bits(),
0,
)?;
use ExitControls as ExitCtrl;
vmcs::set_control(
VmcsControl32::VMEXIT_CONTROLS,
Msr::IA32_VMX_TRUE_EXIT_CTLS,
Msr::IA32_VMX_EXIT_CTLS.read() as u32,
(ExitCtrl::HOST_ADDRESS_SPACE_SIZE
| ExitCtrl::ACK_INTERRUPT_ON_EXIT
| ExitCtrl::SAVE_IA32_PAT
| ExitCtrl::LOAD_IA32_PAT
| ExitCtrl::SAVE_IA32_EFER
| ExitCtrl::LOAD_IA32_EFER)
.bits(),
0,
)?;
let mut val = EntryCtrl::LOAD_IA32_PAT | EntryCtrl::LOAD_IA32_EFER;
if !is_guest {
val |= EntryCtrl::IA32E_MODE_GUEST;
}
use EntryControls as EntryCtrl;
vmcs::set_control(
VmcsControl32::VMENTRY_CONTROLS,
Msr::IA32_VMX_TRUE_ENTRY_CTLS,
Msr::IA32_VMX_ENTRY_CTLS.read() as u32,
val.bits(),
0,
)?;
vmcs::set_ept_pointer(ept_root)?;
VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?;
VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?;
VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?;
VmcsControl32::CR3_TARGET_COUNT.write(0)?;
let exception_bitmap: u32 = 1 << 6;
self.setup_io_bitmap(config)?;
VmcsControl32::EXCEPTION_BITMAP.write(exception_bitmap)?;
VmcsControl64::IO_BITMAP_A_ADDR.write(self.io_bitmap.phys_addr().0.as_usize() as _)?;
VmcsControl64::IO_BITMAP_B_ADDR.write(self.io_bitmap.phys_addr().1.as_usize() as _)?;
VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr().as_usize() as _)?;
VmcsControl64::VIRT_APIC_ADDR.write(self.vlapic.virtual_apic_page_addr().as_usize() as _)?;
VmcsControl64::APIC_ACCESS_ADDR
.write(EmulatedLocalApic::virtual_apic_access_addr().as_usize() as _)?;
VmcsControl64::EOI_EXIT0.write(u64::MAX)?;
VmcsControl64::EOI_EXIT1.write(u64::MAX)?;
VmcsControl64::EOI_EXIT2.write(u64::MAX)?;
VmcsControl64::EOI_EXIT3.write(u64::MAX)?;
Ok(())
}
fn get_paging_level(&self) -> usize {
let mut level: u32 = 0; let cr0 = VmcsGuestNW::CR0.read().unwrap();
let cr4 = VmcsGuestNW::CR4.read().unwrap();
let efer = VmcsGuest64::IA32_EFER.read().unwrap();
if cr0 & Cr0Flags::PAGING.bits() as usize != 0 {
if cr4 & Cr4Flags::PHYSICAL_ADDRESS_EXTENSION.bits() as usize != 0 {
if efer & EferFlags::LONG_MODE_ACTIVE.bits() != 0 {
level = 4;
} else {
level = 3;
}
} else {
level = 2;
}
}
level as usize
}
}
impl VmxVcpu {
fn set_cr(&mut self, cr_idx: usize, val: u64) {
(|| -> AxResult {
match cr_idx {
0 => {
let must0 = Msr::IA32_VMX_CR0_FIXED1.read()
& !(Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE).bits();
let must1 = Msr::IA32_VMX_CR0_FIXED0.read()
& !(Cr0Flags::PAGING | Cr0Flags::PROTECTED_MODE_ENABLE).bits();
VmcsGuestNW::CR0.write(((val & must0) | must1) as _)?;
VmcsControlNW::CR0_READ_SHADOW.write(val as _)?;
VmcsControlNW::CR0_GUEST_HOST_MASK.write((must1 | !must0) as _)?;
}
3 => VmcsGuestNW::CR3.write(val as _)?,
4 => {
let must0 = Msr::IA32_VMX_CR4_FIXED1.read();
let must1 = Msr::IA32_VMX_CR4_FIXED0.read();
let val = val | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS.bits();
VmcsGuestNW::CR4.write(((val & must0) | must1) as _)?;
VmcsControlNW::CR4_READ_SHADOW.write(val as _)?;
VmcsControlNW::CR4_GUEST_HOST_MASK.write((must1 | !must0) as _)?;
}
_ => unreachable!(),
};
Ok(())
})()
.expect("Failed to write guest control register")
}
#[allow(dead_code)]
fn cr(&self, cr_idx: usize) -> usize {
(|| -> AxResult<usize> {
Ok(match cr_idx {
0 => VmcsGuestNW::CR0.read()?,
3 => VmcsGuestNW::CR3.read()?,
4 => {
let host_mask = VmcsControlNW::CR4_GUEST_HOST_MASK.read()?;
(VmcsControlNW::CR4_READ_SHADOW.read()? & host_mask)
| (VmcsGuestNW::CR4.read()? & !host_mask)
}
_ => unreachable!(),
})
})()
.expect("Failed to read guest control register")
}
}
fn read_guest_phys_u64(gpa: usize) -> u64 {
let hva = host::phys_to_virt(HostPhysAddr::from(gpa));
unsafe { core::ptr::read_unaligned(hva.as_ptr() as *const u64) }
}
macro_rules! vmx_entry_with {
($instr:literal) => {
naked_asm!(
"pushfq", "pop qword ptr [rdi + {host_rflags}]",
save_regs_to_stack!(), "mov [rdi + {host_stack_size}], rsp", "mov rsp, rdi", restore_regs_from_stack!(), $instr, "jmp {failed}",
host_stack_size = const size_of::<GeneralRegisters>(),
host_rflags = const size_of::<GeneralRegisters>() + size_of::<u64>(),
failed = sym Self::vmx_entry_failed,
)
}
}
impl VmxVcpu {
#[unsafe(naked)]
unsafe extern "C" fn vmx_launch(&mut self) -> usize {
vmx_entry_with!("vmlaunch")
}
#[unsafe(naked)]
unsafe extern "C" fn vmx_resume(&mut self) -> usize {
vmx_entry_with!("vmresume")
}
#[unsafe(naked)]
unsafe extern "C" fn vmx_exit(&mut self) -> usize {
naked_asm!(
"cli", save_regs_to_stack!(), "mov rsp, [rsp + {host_stack_top}]", restore_regs_from_stack!(), "ret",
host_stack_top = const size_of::<GeneralRegisters>(),
);
}
fn vmx_entry_failed() -> ! {
panic!("{}", vmcs::instruction_error().as_str())
}
fn allow_interrupt(&self) -> bool {
let rflags = VmcsGuestNW::RFLAGS.read().unwrap();
let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap();
rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0
&& block_state == 0
}
fn inject_pending_events(&mut self) -> AxResult {
if let Some(event) = self.pending_events.front() {
if event.vector < 32 || self.allow_interrupt() {
vmcs::inject_event(event.vector, event.err_code)?;
if event.vector >= 32 {
self.vlapic
.accept_interrupt(event.vector, event.level_triggered);
}
self.pending_events.pop_front();
} else {
self.set_interrupt_window(true)?;
}
}
Ok(())
}
fn handle_interrupt_window(&mut self) -> AxResult {
self.set_interrupt_window(false)?;
self.inject_pending_events()
}
fn builtin_vmexit_handler(&mut self, exit_info: &VmxExitInfo) -> Option<AxResult> {
const APIC_BASE_MSR: u32 = 0x1b;
const X2APIC_MSR_BASE: u32 = 0x800;
const X2APIC_MSR_END: u32 = 0x8ff; const AMD64_DE_CFG: u32 = 0xc001_1029;
match exit_info.exit_reason {
VmxExitReason::INTERRUPT_WINDOW => Some(self.handle_interrupt_window()),
VmxExitReason::XSETBV => Some(self.handle_xsetbv()),
VmxExitReason::CR_ACCESS => Some(self.handle_cr()),
VmxExitReason::CPUID => Some(self.handle_cpuid()),
msr_rw @ (VmxExitReason::MSR_READ | VmxExitReason::MSR_WRITE)
if self.regs().rcx as u32 == APIC_BASE_MSR =>
{
Some(self.handle_apic_base_msr_access(msr_rw == VmxExitReason::MSR_WRITE))
}
msr_rw @ (VmxExitReason::MSR_READ | VmxExitReason::MSR_WRITE)
if {
let msr = self.regs().rcx as u32;
(X2APIC_MSR_BASE..=X2APIC_MSR_END).contains(&msr)
} =>
{
Some(self.handle_apic_msr_access(
msr_rw == VmxExitReason::MSR_WRITE,
self.regs().rcx as u32,
))
}
msr_rw @ (VmxExitReason::MSR_READ | VmxExitReason::MSR_WRITE)
if self.regs().rcx as u32 == AMD64_DE_CFG =>
{
Some(self.handle_amd64_de_cfg_msr_access(msr_rw == VmxExitReason::MSR_WRITE))
}
VmxExitReason::APIC_ACCESS => Some(self.handle_apic_access(exit_info)),
_ => None,
}
}
fn read_edx_eax(&self) -> u64 {
((self.regs().rdx & 0xffff_ffff) << 32) | (self.regs().rax & 0xffff_ffff)
}
fn write_edx_eax(&mut self, val: u64) {
self.regs_mut().rax = val & 0xffff_ffff;
self.regs_mut().rdx = val >> 32;
}
fn handle_apic_base_msr_access(&mut self, write: bool) -> AxResult {
const VMEXIT_INSTR_LEN_RDMSR_WRMSR: u8 = 2;
self.advance_rip(VMEXIT_INSTR_LEN_RDMSR_WRMSR)?;
if write {
let value = self.read_edx_eax();
trace!("handle_vlapic_apic_base_write: value={value:#x}");
self.vlapic.set_apic_base(value)
} else {
let value = self.vlapic.apic_base();
trace!("handle_vlapic_apic_base_read: value={value:#x}");
self.write_edx_eax(value);
Ok(())
}
}
fn handle_apic_msr_access(&mut self, write: bool, msr: u32) -> AxResult {
const VMEXIT_INSTR_LEN_RDMSR_WRMSR: u8 = 2;
self.advance_rip(VMEXIT_INSTR_LEN_RDMSR_WRMSR)?;
let msr = msr as _;
if write {
let value = self.read_edx_eax() as usize;
trace!("handle_vlapic_msr_write: msr={msr:#x}, value={value:#x}");
<EmulatedLocalApic as BaseDeviceOps<SysRegAddrRange>>::handle_write(
&self.vlapic,
SysRegAddr::new(msr),
AccessWidth::Qword,
value,
)
} else {
let value = <EmulatedLocalApic as BaseDeviceOps<SysRegAddrRange>>::handle_read(
&self.vlapic,
SysRegAddr::new(msr),
AccessWidth::Qword,
)? as u64;
trace!("handle_vlapic_msr_read: msr={msr:#x}, value={value:#x}");
self.write_edx_eax(value);
Ok(())
}
}
fn handle_amd64_de_cfg_msr_access(&mut self, write: bool) -> AxResult {
const VMEXIT_INSTR_LEN_RDMSR_WRMSR: u8 = 2;
self.advance_rip(VMEXIT_INSTR_LEN_RDMSR_WRMSR)?;
if !write {
self.write_edx_eax(0);
}
Ok(())
}
fn handle_apic_access(&mut self, exit_info: &VmxExitInfo) -> AxResult {
let apic_access_exit_info = self.apic_access_exit_info()?;
let write = match apic_access_exit_info.access_type {
ApicAccessExitType::LinearDataWrite => true,
ApicAccessExitType::LinearDataRead => false,
_ => {
warn!(
"Unsupported APIC access type: {:?}",
apic_access_exit_info.access_type
);
return ax_err!(BadState, "Unsupported APIC access type");
}
};
let reg = apic_access_exit_info.offset as usize;
let addr = GuestPhysAddr::from(X86_APIC_ACCESS_GPA + reg);
if write {
let value = self.decode_apic_mmio_write_value(exit_info)?;
<EmulatedLocalApic as BaseDeviceOps<AddrRange<GuestPhysAddr>>>::handle_write(
&self.vlapic,
addr,
AccessWidth::Dword,
value,
)?;
} else {
let value =
<EmulatedLocalApic as BaseDeviceOps<AddrRange<GuestPhysAddr>>>::handle_read(
&self.vlapic,
addr,
AccessWidth::Dword,
)?;
self.regs_mut().rax = value as u64;
}
self.advance_rip(exit_info.exit_instruction_length as _)
}
fn decode_apic_mmio_write_value(&self, exit_info: &VmxExitInfo) -> AxResult<usize> {
let mut rip = self.gla2gva(GuestVirtAddr::from(exit_info.guest_rip));
let mut rex = 0u8;
Self::skip_simple_prefixes(self, &mut rip, &mut rex)?;
let opcode = self.read_guest_u8(rip)?;
rip += 1;
let modrm = self.read_guest_u8(rip)?;
rip += 1;
let mode = modrm >> 6;
if mode == 0b11 {
return ax_err!(Unsupported, "APIC MMIO write destination is not memory");
}
if opcode == 0x89 {
let reg = ((modrm >> 3) & 0x7) | ((rex & 0x4) << 1);
return Ok(self.guest_regs.get_reg_of_index(reg) as u32 as usize);
}
if opcode == 0xc7 && (modrm >> 3) & 0x7 == 0 {
let imm_addr = self.skip_modrm_memory_operand(rip, modrm, rex)?;
let mut value = 0u32;
for i in 0..size_of::<u32>() {
value |= (self.read_guest_u8(imm_addr + i)? as u32) << (i * 8);
}
return Ok(value as usize);
}
ax_err!(
Unsupported,
format_args!("unsupported APIC MMIO write opcode {opcode:#x}")
)
}
fn decode_ept_mmio_access(
&self,
exit_info: &VmxExitInfo,
addr: GuestPhysAddr,
write: bool,
) -> Option<AxVCpuExitReason> {
if !(X86_IOAPIC_BASE..X86_IOAPIC_BASE + X86_IOAPIC_SIZE).contains(&addr.as_usize()) {
return None;
}
let mut rip = self.gla2gva(GuestVirtAddr::from(exit_info.guest_rip));
let mut rex = 0u8;
if let Err(err) = Self::skip_simple_prefixes(self, &mut rip, &mut rex) {
debug!("failed to decode EPT MMIO prefixes: {err:?}");
return None;
}
let opcode = self.read_guest_u8(rip).ok()?;
rip += 1;
let modrm = self.read_guest_u8(rip).ok()?;
rip += 1;
if modrm >> 6 == 0b11 {
debug!("EPT MMIO access did not use a memory operand");
return None;
}
match (write, opcode) {
(true, 0x89) => {
let reg = ((modrm >> 3) & 0x7) | ((rex & 0x4) << 1);
Some(AxVCpuExitReason::MmioWrite {
addr,
width: AccessWidth::Dword,
data: self.guest_regs.get_reg_of_index(reg) as u32 as u64,
})
}
(true, 0xc7) if (modrm >> 3) & 0x7 == 0 => {
let imm_addr = self.skip_modrm_memory_operand(rip, modrm, rex).ok()?;
let mut data = 0u32;
for i in 0..size_of::<u32>() {
data |= (self.read_guest_u8(imm_addr + i).ok()? as u32) << (i * 8);
}
Some(AxVCpuExitReason::MmioWrite {
addr,
width: AccessWidth::Dword,
data: data as u64,
})
}
(false, 0x8b) => {
let reg = (((modrm >> 3) & 0x7) | ((rex & 0x4) << 1)) as usize;
Some(AxVCpuExitReason::MmioRead {
addr,
width: AccessWidth::Dword,
reg,
reg_width: AccessWidth::Dword,
signed_ext: false,
})
}
_ => {
debug!("unsupported EPT MMIO opcode {opcode:#x}, write={write}");
None
}
}
}
fn skip_simple_prefixes(&self, rip: &mut GuestVirtAddr, rex: &mut u8) -> AxResult {
loop {
let byte = self.read_guest_u8(*rip)?;
if byte == 0x66 {
*rip += 1;
} else if (0x40..=0x4f).contains(&byte) {
*rex = byte;
*rip += 1;
} else {
return Ok(());
}
}
}
fn skip_modrm_memory_operand(
&self,
mut cursor: GuestVirtAddr,
modrm: u8,
rex: u8,
) -> AxResult<GuestVirtAddr> {
let mode = modrm >> 6;
let rm = modrm & 0x7;
if rm == 0b100 {
let sib = self.read_guest_u8(cursor)?;
cursor += 1;
let base = sib & 0x7;
if mode == 0 && base == 0b101 {
cursor += size_of::<u32>();
}
} else if mode == 0 && rm == 0b101 && rex & 0x1 == 0 {
cursor += size_of::<u32>();
}
match mode {
0 => {}
1 => cursor += size_of::<u8>(),
2 => cursor += size_of::<u32>(),
_ => return ax_err!(InvalidInput, "ModRM register operand is not memory"),
}
Ok(cursor)
}
fn read_guest_u8(&self, gva: GuestVirtAddr) -> AxResult<u8> {
let gpa = self.translate_guest_linear(gva)?;
let hva = host::phys_to_virt(HostPhysAddr::from(gpa.as_usize()));
Ok(unsafe { core::ptr::read_volatile(hva.as_ptr()) })
}
fn translate_guest_linear(&self, gva: GuestVirtAddr) -> AxResult<GuestPhysAddr> {
let addr = gva.as_usize();
match self.get_paging_level() {
0 => Ok(GuestPhysAddr::from(addr)),
4 => self.walk_guest_page_table_4level(addr),
level => ax_err!(
Unsupported,
format_args!("unsupported APIC MMIO write decode paging level {level}")
),
}
}
fn walk_guest_page_table_4level(&self, gva: usize) -> AxResult<GuestPhysAddr> {
const PRESENT: u64 = 1 << 0;
const HUGE_PAGE: u64 = 1 << 7;
const ADDR_MASK: u64 = 0x000f_ffff_ffff_f000;
const PAGE_4K_MASK: usize = 0xfff;
const PAGE_2M_MASK: usize = 0x1f_ffff;
const PAGE_1G_MASK: usize = 0x3fff_ffff;
let mut table = VmcsGuestNW::CR3.read()? & ADDR_MASK as usize;
let indexes = [
(gva >> 39) & 0x1ff,
(gva >> 30) & 0x1ff,
(gva >> 21) & 0x1ff,
(gva >> 12) & 0x1ff,
];
for (level, index) in indexes.into_iter().enumerate() {
let entry = read_guest_phys_u64(table + index * size_of::<u64>());
if entry & PRESENT == 0 {
return ax_err!(
InvalidInput,
format_args!("guest RIP page table entry is not present at level {level}")
);
}
let paddr = (entry & ADDR_MASK) as usize;
match level {
1 if entry & HUGE_PAGE != 0 => {
return Ok(GuestPhysAddr::from(paddr + (gva & PAGE_1G_MASK)));
}
2 if entry & HUGE_PAGE != 0 => {
return Ok(GuestPhysAddr::from(paddr + (gva & PAGE_2M_MASK)));
}
3 => return Ok(GuestPhysAddr::from(paddr + (gva & PAGE_4K_MASK))),
_ => table = paddr,
}
}
ax_err!(InvalidInput, "failed to translate guest RIP")
}
fn handle_vmx_preemption_timer(&mut self) -> AxResult {
VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(VMX_PREEMPTION_TIMER_SET_VALUE)?;
Ok(())
}
#[allow(clippy::single_match)]
fn handle_cr(&mut self) -> AxResult {
const VM_EXIT_INSTR_LEN_MV_TO_CR: u8 = 3;
let cr_access_info = vmcs::cr_access_info()?;
let reg = cr_access_info.gpr;
let cr = cr_access_info.cr_number;
match cr_access_info.access_type {
0 => {
let val = if reg == 4 {
self.stack_pointer() as u64
} else {
self.guest_regs.get_reg_of_index(reg)
};
if cr == 0 || cr == 4 {
self.advance_rip(VM_EXIT_INSTR_LEN_MV_TO_CR)?;
self.set_cr(cr as usize, val);
if cr == 0 && Cr0Flags::from_bits_truncate(val).contains(Cr0Flags::PAGING) {
vmcs::update_efer()?;
}
return Ok(());
}
}
_ => {}
};
panic!(
"Guest's access to cr not allowed: {:#x?}, {:#x?}",
self, cr_access_info
);
}
fn handle_cpuid(&mut self) -> AxResult {
use raw_cpuid::{CpuIdResult, cpuid};
const VM_EXIT_INSTR_LEN_CPUID: u8 = 2;
const LEAF_FEATURE_INFO: u32 = 0x1;
const LEAF_STRUCTURED_EXTENDED_FEATURE_FLAGS_ENUMERATION: u32 = 0x7;
const LEAF_PROCESSOR_EXTENDED_STATE_ENUMERATION: u32 = 0xd;
const EAX_FREQUENCY_INFO: u32 = 0x16;
const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000;
const LEAF_HYPERVISOR_FEATURE: u32 = 0x4000_0001;
const VENDOR_STR: &[u8; 12] = b"RVMRVMRVMRVM";
let vendor_regs = unsafe { &*(VENDOR_STR.as_ptr() as *const [u32; 3]) };
let regs_clone = *self.regs_mut();
let function = regs_clone.rax as u32;
let res = match function {
LEAF_FEATURE_INFO => {
const FEATURE_VMX: u32 = 1 << 5;
const FEATURE_HYPERVISOR: u32 = 1 << 31;
const FEATURE_MCE: u32 = 1 << 7;
const FEATURE_X2APIC: u32 = 1 << 21;
const FEATURE_TSC_DEADLINE: u32 = 1 << 24;
const FEATURE_APIC: u32 = 1 << 9;
const MAX_LOGICAL_PROCESSORS_MASK: u32 = 0xff << 16;
const INITIAL_APIC_ID_MASK: u32 = 0xff << 24;
let mut res = cpuid!(regs_clone.rax, regs_clone.rcx);
res.ecx &= !FEATURE_VMX;
res.ecx |= FEATURE_X2APIC;
res.ecx &= !FEATURE_TSC_DEADLINE;
res.ecx |= FEATURE_HYPERVISOR;
res.edx &= !FEATURE_MCE;
res.edx |= FEATURE_APIC;
res.ebx &= !(MAX_LOGICAL_PROCESSORS_MASK | INITIAL_APIC_ID_MASK);
res.ebx |= 1 << 16;
res
}
0xb | 0x1f => CpuIdResult {
eax: 0,
ebx: 0,
ecx: regs_clone.rcx as u32,
edx: 0,
},
LEAF_STRUCTURED_EXTENDED_FEATURE_FLAGS_ENUMERATION => {
let mut res = cpuid!(regs_clone.rax, regs_clone.rcx);
if regs_clone.rcx == 0 {
res.ecx.set_bit(5, false); res.ecx.set_bit(16, false); }
res
}
LEAF_PROCESSOR_EXTENDED_STATE_ENUMERATION => {
self.load_guest_xstate();
let res = cpuid!(regs_clone.rax, regs_clone.rcx);
self.load_host_xstate();
res
}
LEAF_HYPERVISOR_INFO => CpuIdResult {
eax: LEAF_HYPERVISOR_FEATURE,
ebx: vendor_regs[0],
ecx: vendor_regs[1],
edx: vendor_regs[2],
},
LEAF_HYPERVISOR_FEATURE => CpuIdResult {
eax: 0,
ebx: 0,
ecx: 0,
edx: 0,
},
EAX_FREQUENCY_INFO => {
const FALLBACK_TSC_FREQUENCY_MHZ: u32 = 3_000;
let mut res = cpuid!(regs_clone.rax, regs_clone.rcx);
if res.eax == 0 {
let frequency_mhz =
crate::host_tsc_frequency_mhz().unwrap_or(FALLBACK_TSC_FREQUENCY_MHZ);
warn!(
"handle_cpuid: Failed to get TSC frequency by CPUID, default to \
{frequency_mhz} MHz"
);
res.eax = frequency_mhz;
}
res
}
_ => cpuid!(regs_clone.rax, regs_clone.rcx),
};
trace!(
"VM exit: CPUID({:#x}, {:#x}): {:?}",
regs_clone.rax, regs_clone.rcx, res
);
let regs = self.regs_mut();
regs.rax = res.eax as _;
regs.rbx = res.ebx as _;
regs.rcx = res.ecx as _;
regs.rdx = res.edx as _;
self.advance_rip(VM_EXIT_INSTR_LEN_CPUID)?;
Ok(())
}
fn handle_xsetbv(&mut self) -> AxResult {
const XCR_XCR0: u64 = 0;
const VM_EXIT_INSTR_LEN_XSETBV: u8 = 3;
let index = self.guest_regs.rcx.get_bits(0..32);
let value = self.guest_regs.rdx.get_bits(0..32) << 32 | self.guest_regs.rax.get_bits(0..32);
if index == XCR_XCR0 {
Xcr0::from_bits(value)
.and_then(|x| {
if !x.contains(Xcr0::XCR0_FPU_MMX_STATE) {
return None;
}
if x.contains(Xcr0::XCR0_AVX_STATE) && !x.contains(Xcr0::XCR0_SSE_STATE) {
return None;
}
if x.contains(Xcr0::XCR0_BNDCSR_STATE) ^ x.contains(Xcr0::XCR0_BNDREG_STATE) {
return None;
}
let avx512_state = x.contains(Xcr0::XCR0_OPMASK_STATE)
|| x.contains(Xcr0::XCR0_ZMM_HI256_STATE)
|| x.contains(Xcr0::XCR0_HI16_ZMM_STATE);
let avx512_state_complete = x.contains(Xcr0::XCR0_OPMASK_STATE)
&& x.contains(Xcr0::XCR0_ZMM_HI256_STATE)
&& x.contains(Xcr0::XCR0_HI16_ZMM_STATE);
if avx512_state
&& (!avx512_state_complete
|| !x.contains(Xcr0::XCR0_AVX_STATE)
|| !x.contains(Xcr0::XCR0_SSE_STATE))
{
return None;
}
Some(x)
})
.ok_or_else(|| ax_err_type!(InvalidInput))
.and_then(|x| {
self.xstate.guest_xcr0 = x.bits();
self.advance_rip(VM_EXIT_INSTR_LEN_XSETBV)
})
} else {
ax_err!(Unsupported, "only xcr0 is supported")
}
}
fn load_guest_xstate(&mut self) {
self.xstate.switch_to_guest();
}
fn load_host_xstate(&mut self) {
self.xstate.switch_to_host();
}
}
impl Drop for VmxVcpu {
fn drop(&mut self) {
unsafe { vmx::vmclear(self.vmcs.phys_addr().as_usize() as u64).unwrap() };
info!("[HV] dropped VmxVcpu(vmcs: {:#x})", self.vmcs.phys_addr());
}
}
fn get_tr_base(tr: SegmentSelector, gdt: &DescriptorTablePointer<u64>) -> u64 {
let index = tr.index() as usize;
let table_len = (gdt.limit as usize + 1) / core::mem::size_of::<u64>();
let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) };
let entry = table[index];
if entry & (1 << 47) != 0 {
let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24;
let base_high = table[index + 1] & 0xffff_ffff;
base_low | base_high << 32
} else {
0
}
}
impl Debug for VmxVcpu {
fn fmt(&self, f: &mut Formatter) -> Result {
(|| -> AxResult<Result> {
Ok(f.debug_struct("VmxVcpu")
.field("guest_regs", &self.guest_regs)
.field("rip", &VmcsGuestNW::RIP.read()?)
.field("rsp", &VmcsGuestNW::RSP.read()?)
.field("rflags", &VmcsGuestNW::RFLAGS.read()?)
.field("cr0", &VmcsGuestNW::CR0.read()?)
.field("cr3", &VmcsGuestNW::CR3.read()?)
.field("cr4", &VmcsGuestNW::CR4.read()?)
.field("cs", &VmcsGuest16::CS_SELECTOR.read()?)
.field("fs_base", &VmcsGuestNW::FS_BASE.read()?)
.field("gs_base", &VmcsGuestNW::GS_BASE.read()?)
.field("tss", &VmcsGuest16::TR_SELECTOR.read()?)
.finish())
})()
.unwrap()
}
}
impl AxArchVCpu for VmxVcpu {
type CreateConfig = ();
type SetupConfig = X86VCpuSetupConfig;
fn new(vm_id: VMId, vcpu_id: VCpuId, _config: Self::CreateConfig) -> AxResult<Self> {
Self::new(vm_id, vcpu_id)
}
fn set_entry(&mut self, entry: GuestPhysAddr) -> AxResult {
self.entry = Some(entry);
Ok(())
}
fn set_ept_root(&mut self, ept_root: HostPhysAddr) -> AxResult {
self.ept_root = Some(ept_root);
Ok(())
}
fn setup(&mut self, config: Self::SetupConfig) -> AxResult {
self.setup_vmcs(self.entry.unwrap(), self.ept_root.unwrap(), config)
}
fn run(&mut self) -> AxResult<AxVCpuExitReason> {
match self.inner_run()? {
Some(exit_info) => Ok(if exit_info.entry_failure {
AxVCpuExitReason::FailEntry {
hardware_entry_failure_reason: 0,
}
} else {
match exit_info.exit_reason {
VmxExitReason::VMCALL => {
self.advance_rip(exit_info.exit_instruction_length as _)?;
AxVCpuExitReason::Hypercall {
nr: self.regs().rax,
args: [
self.regs().rdi,
self.regs().rsi,
self.regs().rdx,
self.regs().rcx,
self.regs().r8,
self.regs().r9,
],
}
}
VmxExitReason::IO_INSTRUCTION => {
let io_info = self.io_exit_info().unwrap();
self.advance_rip(exit_info.exit_instruction_length as _)?;
let port = io_info.port;
if io_info.is_repeat || io_info.is_string {
warn!("VMX unsupported IO-Exit: {io_info:#x?} of {exit_info:#x?}");
warn!("VCpu {self:#x?}");
AxVCpuExitReason::Halt
} else {
let width = match AccessWidth::try_from(io_info.access_size as usize) {
Ok(width) => width,
Err(_) => {
warn!("VMX invalid IO-Exit: {io_info:#x?} of {exit_info:#x?}");
warn!("VCpu {self:#x?}");
return Ok(AxVCpuExitReason::Halt);
}
};
if io_info.is_in {
AxVCpuExitReason::IoRead {
port: Port(port),
width,
}
} else if port == QEMU_EXIT_PORT
&& width == AccessWidth::Word
&& self.regs().rax == QEMU_EXIT_MAGIC
{
AxVCpuExitReason::SystemDown
} else {
AxVCpuExitReason::IoWrite {
port: Port(port),
width,
data: self.regs().rax.get_bits(width.bits_range()),
}
}
}
}
VmxExitReason::EXTERNAL_INTERRUPT => {
let int_info = self.interrupt_exit_info()?;
assert!(int_info.valid);
AxVCpuExitReason::ExternalInterrupt {
vector: int_info.vector as _,
}
}
VmxExitReason::PREEMPTION_TIMER => {
self.handle_vmx_preemption_timer()?;
AxVCpuExitReason::PreemptionTimer
}
VmxExitReason::VIRTUALIZED_EOI => AxVCpuExitReason::InterruptEnd {
vector: self.vlapic.handle_eoi(),
},
VmxExitReason::APIC_WRITE => {
let offset = self.apic_access_exit_info()?.offset as usize;
if offset == 0xb0 {
let vector = self.vlapic.handle_eoi();
AxVCpuExitReason::InterruptEnd { vector }
} else {
AxVCpuExitReason::Nothing
}
}
VmxExitReason::EPT_VIOLATION => {
let info = self.nested_page_fault_info()?;
let write = info.access_flags.contains(MappingFlags::WRITE);
let read = info.access_flags.contains(MappingFlags::READ);
if (read || write)
&& let Some(mmio_exit) = self.decode_ept_mmio_access(
&exit_info,
info.fault_guest_paddr,
write,
)
{
self.advance_rip(exit_info.exit_instruction_length as _)?;
mmio_exit
} else {
AxVCpuExitReason::NestedPageFault {
addr: info.fault_guest_paddr,
access_flags: info.access_flags,
}
}
}
VmxExitReason::MSR_READ => {
AxVCpuExitReason::SysRegRead {
addr: SysRegAddr::new(self.regs().rcx as _),
reg: 0,
}
}
VmxExitReason::MSR_WRITE => {
let value = (self.regs().rax & 0xffff_ffff)
| ((self.regs().rdx & 0xffff_ffff) << 32);
AxVCpuExitReason::SysRegWrite {
addr: SysRegAddr::new(self.regs().rcx as _),
value,
}
}
_ => {
warn!("VMX unsupported VM-Exit: {exit_info:#x?}");
warn!("VCpu {self:#x?}");
AxVCpuExitReason::Halt
}
}
}),
None => Ok(AxVCpuExitReason::Nothing),
}
}
fn bind(&mut self) -> AxResult {
self.bind_to_current_processor()
}
fn unbind(&mut self) -> AxResult {
self.launched = false;
self.unbind_from_current_processor()
}
fn set_gpr(&mut self, reg: usize, val: usize) {
self.regs_mut().set_reg_of_index(reg as u8, val as u64);
}
fn inject_interrupt(&mut self, vector: usize) -> AxResult {
if vector != 0 {
} else {
warn!("interrupt queued in inject_interrupt: vector 0");
panic!()
}
self.queue_event(vector as u8, None);
Ok(())
}
fn inject_interrupt_with_trigger(
&mut self,
vector: usize,
trigger: axvcpu::InterruptTriggerMode,
) -> AxResult {
if vector == 0 {
warn!("interrupt queued in inject_interrupt_with_trigger: vector 0");
panic!()
}
self.queue_event_with_trigger(
vector as u8,
None,
trigger == axvcpu::InterruptTriggerMode::LevelTriggered,
);
Ok(())
}
fn handle_eoi(&mut self) -> Option<u8> {
self.vlapic.handle_eoi()
}
fn set_return_value(&mut self, val: usize) {
self.regs_mut().rax = val as u64;
}
}
#[cfg(test)]
mod tests {
use alloc::format;
use super::*;
#[test]
fn test_vm_cpu_mode_enum() {
assert_ne!(VmCpuMode::Real, VmCpuMode::Protected);
assert_ne!(VmCpuMode::Protected, VmCpuMode::Compatibility);
assert_ne!(VmCpuMode::Compatibility, VmCpuMode::Mode64);
let debug_str = format!("{:?}", VmCpuMode::Mode64);
assert!(debug_str.contains("Mode64"));
}
#[test]
fn test_general_registers_operations() {
let mut regs = GeneralRegisters::default();
assert_eq!(regs.rax, 0);
assert_eq!(regs.rbx, 0);
regs.rax = 0x1234567890abcdef;
regs.rbx = 0xfedcba0987654321;
assert_eq!(regs.rax, 0x1234567890abcdef);
assert_eq!(regs.rbx, 0xfedcba0987654321);
regs.set_reg_of_index(0, 0x1111111111111111); assert_eq!(regs.get_reg_of_index(0), 0x1111111111111111);
regs.set_reg_of_index(1, 0x2222222222222222); assert_eq!(regs.get_reg_of_index(1), 0x2222222222222222);
}
#[test]
fn test_constants() {
assert_eq!(VMX_PREEMPTION_TIMER_SET_VALUE, 100_000);
assert_eq!(QEMU_EXIT_PORT, 0x604);
assert_eq!(QEMU_EXIT_MAGIC, 0x2000);
assert_eq!(MSR_IA32_EFER_LMA_BIT, 1 << 10);
assert_eq!(CR0_PE, 1 << 0);
}
#[test]
fn test_bit_operations() {
use bit_field::BitField;
let mut value = 0u64;
value.set_bits(0..32, 0x12345678);
value.set_bits(32..64, 0xabcdef00);
assert_eq!(value.get_bits(0..32), 0x12345678);
assert_eq!(value.get_bits(32..64), 0xabcdef00);
}
mod vmx_vcpu_tests {
use super::*;
fn create_test_vcpu_regs() -> GeneralRegisters {
let mut regs = GeneralRegisters::default();
regs.rax = 0x1000;
regs.rbx = 0x2000;
regs.rcx = 0x3000;
regs.rdx = 0x4000;
regs
}
#[test]
fn test_general_registers_clone() {
let regs = create_test_vcpu_regs();
let cloned_regs = regs.clone();
assert_eq!(regs.rax, cloned_regs.rax);
assert_eq!(regs.rbx, cloned_regs.rbx);
assert_eq!(regs.rcx, cloned_regs.rcx);
assert_eq!(regs.rdx, cloned_regs.rdx);
}
#[test]
fn test_edx_eax_operations() {
let rax = 0x12345678u64;
let rdx = 0xabcdef00u64;
let combined = ((rdx & 0xffff_ffff) << 32) | (rax & 0xffff_ffff);
assert_eq!(combined, 0xabcdef0012345678);
let val = 0xfedcba0987654321u64;
let new_rax = val & 0xffff_ffff;
let new_rdx = val >> 32;
assert_eq!(new_rax, 0x87654321);
assert_eq!(new_rdx, 0xfedcba09);
}
#[test]
fn test_register_bit_operations() {
let mut regs = GeneralRegisters::default();
regs.rcx = 0;
regs.rcx.set_bits(0..32, 0x12345678);
assert_eq!(regs.rcx.get_bits(0..32), 0x12345678);
regs.rdx = 0xffffffffffffffff;
regs.rdx.set_bits(32..64, 0);
assert_eq!(regs.rdx.get_bits(32..64), 0);
assert_eq!(regs.rdx.get_bits(0..32), 0xffffffff);
}
#[test]
fn test_gla2gva_logic() {
let guest_rip = 0x1000usize;
let seg_base_64bit = 0; let seg_base_other = 0x10000;
let gva_64bit = guest_rip + seg_base_64bit;
assert_eq!(gva_64bit, 0x1000);
let gva_other = guest_rip + seg_base_other;
assert_eq!(gva_other, 0x11000);
}
#[test]
fn test_interrupt_vector_validation() {
let valid_exception = 6; let valid_interrupt = 0x20;
let invalid_vector = 0;
assert!(valid_exception < 32); assert!(valid_interrupt >= 32); assert_eq!(invalid_vector, 0); }
#[test]
fn test_page_walk_info_struct() {
let ptw_info = GuestPageWalkInfo {
top_entry: 0x1000,
level: 4,
width: 9,
is_user_mode_access: false,
is_write_access: false,
is_inst_fetch: false,
pse: true,
wp: true,
nxe: true,
is_smap_on: false,
is_smep_on: false,
};
assert_eq!(ptw_info.level, 4);
assert_eq!(ptw_info.width, 9);
assert_eq!(ptw_info.top_entry, 0x1000);
}
#[test]
fn test_cpuid_constants() {
const LEAF_FEATURE_INFO: u32 = 0x1;
const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000;
const FEATURE_VMX: u32 = 1 << 5;
const FEATURE_HYPERVISOR: u32 = 1 << 31;
assert_eq!(LEAF_FEATURE_INFO, 1);
assert_eq!(LEAF_HYPERVISOR_INFO, 0x40000000);
assert_eq!(FEATURE_VMX, 32);
assert_eq!(FEATURE_HYPERVISOR, 0x80000000);
}
#[test]
fn test_cr_flags_operations() {
use x86_64::registers::control::{Cr0Flags, Cr4Flags};
let cr0_flags = Cr0Flags::PAGING | Cr0Flags::PROTECTED_MODE_ENABLE;
assert!(cr0_flags.contains(Cr0Flags::PAGING));
assert!(cr0_flags.contains(Cr0Flags::PROTECTED_MODE_ENABLE));
assert!(!cr0_flags.contains(Cr0Flags::CACHE_DISABLE));
let cr4_flags = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS | Cr4Flags::PAGE_SIZE_EXTENSION;
assert!(cr4_flags.contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS));
assert!(cr4_flags.contains(Cr4Flags::PAGE_SIZE_EXTENSION));
}
#[test]
fn test_access_width_operations() {
use axvcpu::AccessWidth;
assert_eq!(AccessWidth::Byte as usize, 0);
assert_eq!(AccessWidth::Word as usize, 1);
assert_eq!(AccessWidth::Dword as usize, 2);
assert_eq!(AccessWidth::Qword as usize, 3);
assert_eq!(AccessWidth::try_from(1), Ok(AccessWidth::Byte));
assert_eq!(AccessWidth::try_from(2), Ok(AccessWidth::Word));
assert_eq!(AccessWidth::try_from(4), Ok(AccessWidth::Dword));
assert_eq!(AccessWidth::try_from(8), Ok(AccessWidth::Qword));
}
}
#[test]
fn test_get_tr_base_logic() {
let mut test_entry = 0u64;
test_entry |= 1u64 << 47; test_entry |= (0x1000u64 & 0xFFFFFF) << 16;
let present = test_entry & (1 << 47) != 0;
assert!(present);
let base_low = (test_entry >> 16) & 0xFFFFFF;
let base_high = (test_entry >> 56) & 0xFF;
let base_addr = base_low | (base_high << 24);
assert_eq!(base_addr, 0x1000);
}
#[test]
fn test_vmx_exit_reason_enum() {
let test_reason = VmxExitReason::VMCALL;
match test_reason {
VmxExitReason::VMCALL => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_debug_implementations() {
let cpu_mode = VmCpuMode::Mode64;
let debug_str = format!("{:?}", cpu_mode);
assert!(!debug_str.is_empty());
let regs = GeneralRegisters::default();
let debug_str = format!("{:?}", regs);
assert!(!debug_str.is_empty());
}
}