use alloc::{collections::BTreeMap, vec::Vec};
use ax_cpumask::CpuMask;
use core::{
cell::UnsafeCell,
sync::atomic::{AtomicUsize, Ordering},
time::Duration,
};
use std::os::arceos::{
api::task::{AxCpuMask, ax_wait_queue_wake},
modules::{
ax_hal::{self, time::busy_wait},
ax_task::{self, AxTaskExt},
},
};
use ax_task::{AxTaskRef, TaskInner, WaitQueue};
use axaddrspace::GuestPhysAddr;
use axvcpu::{AxVCpuExitReason, VCpuState};
use crate::{hal::arch::inject_interrupt, task::VCpuTask};
use crate::{
task::AsVCpuTask,
vmm::{VCpuRef, VMRef, sub_running_vm_count},
};
const KERNEL_STACK_SIZE: usize = 0x40000;
static VM_VCPU_TASK_WAIT_QUEUE: Queue = Queue::new();
struct Queue(UnsafeCell<BTreeMap<usize, VMVCpus>>);
unsafe impl Sync for Queue {}
unsafe impl Send for Queue {}
impl Queue {
const fn new() -> Self {
Self(UnsafeCell::new(BTreeMap::new()))
}
fn get(&self, vm_id: &usize) -> Option<&VMVCpus> {
unsafe { (*self.0.get()).get(vm_id) }
}
#[allow(clippy::mut_from_ref)]
fn get_mut(&self, vm_id: &usize) -> Option<&mut VMVCpus> {
unsafe { (*self.0.get()).get_mut(vm_id) }
}
fn insert(&self, vm_id: usize, vcpus: VMVCpus) {
unsafe {
(*self.0.get()).insert(vm_id, vcpus);
}
}
fn remove(&self, vm_id: &usize) -> Option<VMVCpus> {
unsafe { (*self.0.get()).remove(vm_id) }
}
}
pub struct VMVCpus {
_vm_id: usize,
wait_queue: WaitQueue,
vcpu_task_list: Vec<AxTaskRef>,
running_halting_vcpu_count: AtomicUsize,
}
impl VMVCpus {
fn new(vm: VMRef) -> Self {
Self {
_vm_id: vm.id(),
wait_queue: WaitQueue::new(),
vcpu_task_list: Vec::with_capacity(vm.vcpu_num()),
running_halting_vcpu_count: AtomicUsize::new(0),
}
}
fn add_vcpu_task(&mut self, vcpu_task: AxTaskRef) {
self.vcpu_task_list.push(vcpu_task);
}
fn wait(&self) {
self.wait_queue.wait()
}
fn wait_until<F>(&self, condition: F)
where
F: Fn() -> bool,
{
self.wait_queue.wait_until(condition)
}
#[allow(dead_code)]
fn notify_one(&mut self) {
self.wait_queue.notify_one(false);
}
fn notify_all(&mut self) {
self.wait_queue.notify_all(false);
}
fn mark_vcpu_running(&self) {
self.running_halting_vcpu_count
.fetch_add(1, Ordering::Relaxed);
}
fn mark_vcpu_exiting(&self) -> bool {
self.running_halting_vcpu_count
.fetch_sub(1, Ordering::Relaxed)
== 1
}
}
fn wait(vm_id: usize) {
VM_VCPU_TASK_WAIT_QUEUE.get(&vm_id).unwrap().wait()
}
fn wait_for<F>(vm_id: usize, condition: F)
where
F: Fn() -> bool,
{
VM_VCPU_TASK_WAIT_QUEUE
.get(&vm_id)
.unwrap()
.wait_until(condition)
}
pub(crate) fn notify_primary_vcpu(vm_id: usize) {
VM_VCPU_TASK_WAIT_QUEUE
.get_mut(&vm_id)
.unwrap()
.notify_one()
}
pub(crate) fn notify_all_vcpus(vm_id: usize) {
if let Some(vm_vcpus) = VM_VCPU_TASK_WAIT_QUEUE.get_mut(&vm_id) {
vm_vcpus.notify_all();
}
}
pub(crate) fn cleanup_vm_vcpus(vm_id: usize) {
if let Some(vm_vcpus) = VM_VCPU_TASK_WAIT_QUEUE.remove(&vm_id) {
let task_count = vm_vcpus.vcpu_task_list.len();
info!("VM[{}] Joining {} VCpu tasks...", vm_id, task_count);
for (idx, task) in vm_vcpus.vcpu_task_list.iter().enumerate() {
debug!(
"VM[{}] Joining VCpu task[{}]: {}",
vm_id,
idx,
task.id_name()
);
let exit_code = task.join();
debug!(
"VM[{}] VCpu task[{}] exited with code: {}",
vm_id, idx, exit_code
);
}
info!(
"VM[{}] VCpu resources cleaned up, {} VCpu tasks joined successfully",
vm_id, task_count
);
} else {
warn!("VM[{}] VCpu resources not found in queue", vm_id);
}
}
fn mark_vcpu_running(vm_id: usize) {
VM_VCPU_TASK_WAIT_QUEUE
.get(&vm_id)
.unwrap()
.mark_vcpu_running();
}
fn mark_vcpu_exiting(vm_id: usize) -> bool {
VM_VCPU_TASK_WAIT_QUEUE
.get(&vm_id)
.unwrap()
.mark_vcpu_exiting()
}
fn vcpu_on(vm: VMRef, vcpu_id: usize, entry_point: GuestPhysAddr, arg: usize) {
let vcpu = vm.vcpu_list()[vcpu_id].clone();
assert_eq!(
vcpu.state(),
VCpuState::Free,
"vcpu_on: {} invalid vcpu state {:?}",
vcpu.id(),
vcpu.state()
);
vcpu.set_entry(entry_point)
.expect("vcpu_on: set_entry failed");
#[cfg(not(target_arch = "riscv64"))]
vcpu.set_gpr(0, arg);
#[cfg(target_arch = "riscv64")]
{
info!(
"vcpu_on: vcpu[{}] entry={:x} opaque={:x}",
vcpu_id, entry_point, arg
);
vcpu.set_gpr(riscv_vcpu::GprIndex::A0 as usize, vcpu_id);
vcpu.set_gpr(riscv_vcpu::GprIndex::A1 as usize, arg);
}
let vcpu_task = alloc_vcpu_task(&vm, vcpu);
VM_VCPU_TASK_WAIT_QUEUE
.get_mut(&vm.id())
.unwrap()
.add_vcpu_task(vcpu_task);
}
pub fn setup_vm_primary_vcpu(vm: VMRef) {
info!("Initializing VM[{}]'s {} vcpus", vm.id(), vm.vcpu_num());
let vm_id = vm.id();
let mut vm_vcpus = VMVCpus::new(vm.clone());
let primary_vcpu_id = 0;
let primary_vcpu = vm.vcpu_list()[primary_vcpu_id].clone();
let primary_vcpu_task = alloc_vcpu_task(&vm, primary_vcpu);
vm_vcpus.add_vcpu_task(primary_vcpu_task);
VM_VCPU_TASK_WAIT_QUEUE.insert(vm_id, vm_vcpus);
}
pub fn with_vcpu_task<T, F: FnOnce(&AxTaskRef) -> T>(
vm_id: usize,
vcpu_id: usize,
f: F,
) -> Option<T> {
VM_VCPU_TASK_WAIT_QUEUE
.get(&vm_id)
.unwrap()
.vcpu_task_list
.get(vcpu_id)
.map(f)
}
fn alloc_vcpu_task(vm: &VMRef, vcpu: VCpuRef) -> AxTaskRef {
info!("Spawning task for VM[{}] VCpu[{}]", vm.id(), vcpu.id());
let mut vcpu_task = TaskInner::new(
vcpu_run,
format!("VM[{}]-VCpu[{}]", vm.id(), vcpu.id()),
KERNEL_STACK_SIZE,
);
if let Some(phys_cpu_set) = vcpu.phys_cpu_set() {
vcpu_task.set_cpumask(AxCpuMask::from_raw_bits(phys_cpu_set));
}
let inner = VCpuTask::new(vm, vcpu);
*vcpu_task.task_ext_mut() = Some(AxTaskExt::from_impl(inner));
info!(
"VCpu task {} created {:?}",
vcpu_task.id_name(),
vcpu_task.cpumask()
);
ax_task::spawn_task(vcpu_task)
}
fn vcpu_run() {
let curr = ax_task::current();
let vm = curr.as_vcpu_task().vm();
let vcpu = curr.as_vcpu_task().vcpu.clone();
let vm_id = vm.id();
let vcpu_id = vcpu.id();
let boot_delay_sec = (vm_id - 1) * 5;
info!("VM[{vm_id}] boot delay: {boot_delay_sec}s");
busy_wait(Duration::from_secs(boot_delay_sec as _));
info!("VM[{}] VCpu[{}] waiting for running", vm.id(), vcpu.id());
wait_for(vm_id, || vm.running());
info!("VM[{}] VCpu[{}] running...", vm.id(), vcpu.id());
mark_vcpu_running(vm_id);
loop {
match vm.run_vcpu(vcpu_id) {
Ok(exit_reason) => match exit_reason {
AxVCpuExitReason::Hypercall { nr, args } => {
debug!("Hypercall [{nr}] args {args:x?}");
use crate::vmm::hvc::HyperCall;
match HyperCall::new(vcpu.clone(), vm.clone(), nr, args) {
Ok(hypercall) => {
let ret_val = match hypercall.execute() {
Ok(ret_val) => ret_val as isize,
Err(err) => {
warn!("Hypercall [{nr:#x}] failed: {err:?}");
-1
}
};
vcpu.set_return_value(ret_val as usize);
}
Err(err) => {
warn!("Hypercall [{nr:#x}] failed: {err:?}");
}
}
}
AxVCpuExitReason::FailEntry {
hardware_entry_failure_reason,
} => {
warn!(
"VM[{vm_id}] VCpu[{vcpu_id}] run failed with exit code {hardware_entry_failure_reason}"
);
}
AxVCpuExitReason::ExternalInterrupt { vector } => {
debug!("VM[{vm_id}] run VCpu[{vcpu_id}] get irq {vector}");
ax_hal::trap::irq_handler(vector as usize);
super::timer::check_events();
#[cfg(target_arch = "riscv64")]
{
vcpu.get_arch_vcpu().latch_hvip_from_hw();
}
}
AxVCpuExitReason::Halt => {
debug!("VM[{vm_id}] run VCpu[{vcpu_id}] Halt");
wait(vm_id)
}
AxVCpuExitReason::Nothing => {}
AxVCpuExitReason::CpuDown { _state } => {
warn!("VM[{vm_id}] run VCpu[{vcpu_id}] CpuDown state {_state:#x}");
wait(vm_id)
}
AxVCpuExitReason::CpuUp {
target_cpu,
entry_point,
arg,
} => {
info!(
"VM[{vm_id}]'s VCpu[{vcpu_id}] try to boot target_cpu [{target_cpu}] entry_point={entry_point:x} arg={arg:#x}"
);
let vcpu_mappings = vm.get_vcpu_affinities_pcpu_ids();
let target_vcpu_id = vcpu_mappings
.iter()
.find_map(|(vcpu_id, _, phys_id)| {
if *phys_id == target_cpu as usize {
Some(*vcpu_id)
} else {
None
}
})
.unwrap_or_else(|| {
panic!("Physical CPU ID {target_cpu} not found in VM configuration",)
});
vcpu_on(vm.clone(), target_vcpu_id, entry_point, arg as _);
#[cfg(not(target_arch = "riscv64"))]
vcpu.set_gpr(0, 0);
#[cfg(target_arch = "riscv64")]
vcpu.set_gpr(riscv_vcpu::GprIndex::A0 as usize, 0);
}
AxVCpuExitReason::SystemDown => {
warn!("VM[{vm_id}] run VCpu[{vcpu_id}] SystemDown");
vm.shutdown().expect("VM shutdown failed");
notify_all_vcpus(vm_id);
}
AxVCpuExitReason::SendIPI {
target_cpu,
target_cpu_aux,
send_to_all,
send_to_self,
vector,
} => {
debug!(
"VM[{vm_id}] run VCpu[{vcpu_id}] SendIPI, target_cpu={target_cpu:#x}, target_cpu_aux={target_cpu_aux:#x}, vector={vector}",
);
if send_to_all {
unimplemented!("Send IPI to all CPUs is not implemented yet");
}
if target_cpu == vcpu_id as u64 || send_to_self {
inject_interrupt(vector as _);
} else {
vm.inject_interrupt_to_vcpu(
CpuMask::one_shot(target_cpu as _),
vector as _,
)
.unwrap();
}
}
e => {
warn!("VM[{vm_id}] run VCpu[{vcpu_id}] unhandled vmexit: {e:?}");
}
},
Err(err) => {
error!("VM[{vm_id}] run VCpu[{vcpu_id}] get error {err:?}");
vm.shutdown().expect("VM shutdown failed");
notify_all_vcpus(vm_id);
}
}
if vm.suspending() {
debug!(
"VM[{}] VCpu[{}] is suspended, waiting for resume...",
vm_id, vcpu_id
);
wait_for(vm_id, || !vm.suspending());
info!("VM[{}] VCpu[{}] resumed from suspend", vm_id, vcpu_id);
continue;
}
if vm.stopping() {
warn!(
"VM[{}] VCpu[{}] stopping because of VM stopping",
vm_id, vcpu_id
);
if mark_vcpu_exiting(vm_id) {
info!("VM[{vm_id}] VCpu[{vcpu_id}] last VCpu exiting, decreasing running VM count");
vm.set_vm_status(axvm::VMStatus::Stopped);
info!("VM[{}] state changed to Stopped", vm_id);
sub_running_vm_count(1);
ax_wait_queue_wake(&super::VMM, 1);
}
break;
}
}
info!("VM[{}] VCpu[{}] exiting...", vm_id, vcpu_id);
}