axvm 0.5.11

Virtual Machine resource management crate for ArceOS's hypervisor variant.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
// Copyright 2025 The Axvisor Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use alloc::{collections::BTreeMap, format, sync::Arc, vec::Vec};
use core::sync::atomic::{AtomicUsize, Ordering};

use ax_errno::{AxResult, ax_err_type};
use ax_kspin::SpinNoIrq as Mutex;
#[cfg(target_arch = "riscv64")]
use riscv_vcpu::GprIndex as RiscvGprIndex;

use crate::{
    AsVCpuTask, AxVCpuExitReason, CpuMask, GuestPhysAddr, VCpuState, VCpuTask,
    runtime::{VCpuRef, VMRef, sub_running_vm_count},
};

const KERNEL_STACK_SIZE: usize = 0x40000; // 256 KiB

/// A global map that holds the vCPU task state for each VM.
static VM_VCPU_TASKS: Mutex<BTreeMap<usize, Arc<VMVCpus>>> = Mutex::new(BTreeMap::new());

fn get_vm_vcpus(vm_id: usize) -> Option<Arc<VMVCpus>> {
    VM_VCPU_TASKS.lock().get(&vm_id).cloned()
}

/// A structure representing the VCpus of a specific VM, including a wait queue
/// and a list of tasks associated with the VCpus.
pub struct VMVCpus {
    // The ID of the VM to which these VCpus belong.
    _vm_id: usize,
    // A wait queue to manage task scheduling for the VCpus.
    wait_queue: crate::WaitQueue,
    // A map of tasks associated with the VCpus of this VM, keyed by vCPU ID.
    vcpu_task_list: Mutex<BTreeMap<usize, crate::AxTaskRef>>,
    // Pending virtual interrupts that must be injected by the owning vCPU task.
    pending_interrupts: Mutex<BTreeMap<usize, Vec<usize>>>,
    /// The number of currently running or halting VCpus. Used to track when the VM is fully
    /// shutdown.
    ///
    /// This number is incremented when a VCpu starts running and decremented when it exits because
    /// of the VM being shutdown.
    running_halting_vcpu_count: AtomicUsize,
}

impl VMVCpus {
    /// Creates a new `VMVCpus` instance for the given VM.
    ///
    /// # Arguments
    ///
    /// * `vm` - A reference to the VM for which the VCpus are being created.
    ///
    /// # Returns
    ///
    /// A new `VMVCpus` instance with an empty task list and a fresh wait queue.
    fn new(vm: VMRef) -> Self {
        Self {
            _vm_id: vm.id(),
            wait_queue: crate::WaitQueue::new(),
            vcpu_task_list: Mutex::new(BTreeMap::new()),
            pending_interrupts: Mutex::new(BTreeMap::new()),
            running_halting_vcpu_count: AtomicUsize::new(0),
        }
    }

    /// Adds a VCpu task to the list of VCpu tasks for this VM.
    ///
    /// # Arguments
    ///
    /// * `vcpu_task` - A reference to the task associated with a VCpu that is to be added.
    fn add_vcpu_task(&self, vcpu_id: usize, vcpu_task: crate::AxTaskRef) {
        self.vcpu_task_list.lock().insert(vcpu_id, vcpu_task);
        self.pending_interrupts.lock().entry(vcpu_id).or_default();
    }

    fn queue_interrupt(&self, vcpu_id: usize, vector: usize) -> AxResult<usize> {
        let task = self
            .vcpu_task_list
            .lock()
            .get(&vcpu_id)
            .cloned()
            .ok_or_else(|| ax_err_type!(NotFound, format!("vCPU {vcpu_id} task not found")))?;
        let mut pending = self.pending_interrupts.lock();
        pending.entry(vcpu_id).or_default().push(vector);
        Ok(task.cpu_id() as usize)
    }

    fn drain_pending_interrupts(&self, vcpu_id: usize) -> Vec<usize> {
        let mut pending = self.pending_interrupts.lock();
        pending
            .get_mut(&vcpu_id)
            .map(core::mem::take)
            .unwrap_or_default()
    }

    /// Blocks the current thread on the wait queue associated with the VCpus of this VM.
    fn wait(&self) {
        self.wait_queue.wait()
    }

    /// Blocks the current thread on the wait queue associated with the VCpus of this VM
    /// until the provided condition is met.
    fn wait_until<F>(&self, condition: F)
    where
        F: Fn() -> bool,
    {
        self.wait_queue.wait_until(condition)
    }

    #[allow(dead_code)]
    fn notify_one(&self) {
        // FIXME: `WaitQueue::len` is removed
        // info!("Current wait queue length: {}", self.wait_queue.len());
        self.wait_queue.notify_one(false);
    }

    /// Notify all waiting vCPU threads to wake up.
    /// This is useful when shutting down a VM to ensure all vCPUs can check the shutdown flag.
    fn notify_all(&self) {
        self.wait_queue.notify_all(false);
    }

    /// Increments the count of running or halting VCpus by one.
    fn mark_vcpu_running(&self) {
        self.running_halting_vcpu_count
            .fetch_add(1, Ordering::Relaxed);
        // Relaxed is enough here, as we only need to ensure that the count is incremented and
        // decremented correctly, and there is no other data synchronization needed.
    }

    /// Decrements the count of running or halting VCpus by one. Returns true if this was the last
    /// VCpu to exit.
    fn mark_vcpu_exiting(&self) -> bool {
        self.running_halting_vcpu_count.fetch_update(
            Ordering::Relaxed,
            Ordering::Relaxed,
            |count| count.checked_sub(1),
        ) == Ok(1)
        // Relaxed is enough here, as we only need to ensure that the count is incremented and
        // decremented correctly, and there is no other data synchronization needed.
    }
}

/// Blocks the current thread until it is explicitly woken up, using the wait queue
/// associated with the VCpus of the specified VM.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM whose VCpu wait queue is used to block the current thread.
fn wait(vm_vcpus: &VMVCpus) {
    vm_vcpus.wait();
}

/// Blocks the current thread until the provided condition is met, using the wait queue
/// associated with the VCpus of the specified VM.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM whose VCpu wait queue is used to block the current thread.
/// * `condition` - A closure that returns a boolean value indicating whether the condition is met.
fn wait_for<F>(vm_vcpus: &VMVCpus, condition: F)
where
    F: Fn() -> bool,
{
    vm_vcpus.wait_until(condition);
}

/// Notifies the primary VCpu task associated with the specified VM to wake up and resume execution.
/// This function is used to notify the primary VCpu of a VM to start running after the VM has been booted.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM whose VCpus are to be notified.
pub(crate) fn notify_primary_vcpu(vm_id: usize) {
    // Generally, the primary VCpu is the first and **only** VCpu in the list.
    if let Some(vm_vcpus) = get_vm_vcpus(vm_id) {
        vm_vcpus.notify_one();
    } else {
        warn!("VM[{vm_id}] vCPU resources not found");
    }
}

/// Notifies all VCpu tasks associated with the specified VM to wake up.
/// This is useful when shutting down a VM to ensure all waiting vCPUs can check the shutdown flag.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM whose VCpus should be notified.
pub(crate) fn notify_all_vcpus(vm_id: usize) {
    if let Some(vm_vcpus) = get_vm_vcpus(vm_id) {
        vm_vcpus.notify_all();
    }
}

pub(crate) fn queue_interrupt(vm_id: usize, vcpu_id: usize, vector: usize) -> AxResult {
    let vm_vcpus = get_vm_vcpus(vm_id)
        .ok_or_else(|| ax_err_type!(NotFound, format!("VM[{vm_id}] vCPU resources not found")))?;
    let cpu_id = vm_vcpus.queue_interrupt(vcpu_id, vector)?;
    vm_vcpus.notify_all();
    crate::host::task::send_ipi(cpu_id);
    Ok(())
}

pub(crate) fn inject_pending_interrupts(vm_id: usize, vcpu_id: usize, vcpu: &VCpuRef) {
    let Some(vm_vcpus) = get_vm_vcpus(vm_id) else {
        warn!("VM[{vm_id}] vCPU resources not found, cannot drain VCpu[{vcpu_id}] interrupts");
        return;
    };

    for vector in vm_vcpus.drain_pending_interrupts(vcpu_id) {
        trace!("Injecting queued interrupt {vector:#x} into VM[{vm_id}] VCpu[{vcpu_id}]");
        if let Err(err) = vcpu.inject_interrupt(vector) {
            warn!(
                "Failed to inject queued interrupt {vector:#x} into VM[{vm_id}] VCpu[{vcpu_id}]: \
                 {err:?}"
            );
        }
    }
}

fn ipi_targets(
    vm: &VMRef,
    current_vcpu_id: usize,
    target_cpu: u64,
    target_cpu_aux: u64,
    send_to_all: bool,
    send_to_self: bool,
) -> CpuMask<64> {
    let mut targets = CpuMask::new();

    if send_to_all {
        for vcpu in vm.vcpu_list() {
            if vcpu.id() != current_vcpu_id {
                targets.set(vcpu.id(), true);
            }
        }
    } else if send_to_self {
        targets.set(current_vcpu_id, true);
    } else {
        #[cfg(target_arch = "aarch64")]
        {
            for (vcpu_id, _, phys_id) in vm.get_vcpu_affinities_pcpu_ids() {
                let affinity = phys_id as u64;
                let aff0 = affinity & 0xff;
                let aff123 = affinity & !0xff;
                if aff123 == target_cpu && aff0 < 16 && (target_cpu_aux & (1u64 << aff0)) != 0 {
                    targets.set(vcpu_id, true);
                }
            }
        }

        #[cfg(not(target_arch = "aarch64"))]
        {
            let _ = target_cpu_aux;
            targets.set(target_cpu as usize, true);
        }
    }

    targets
}

/// Cleans up VCpu resources for a VM that is being deleted.
/// This removes the VM's entry from the global VCpu wait queue.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM whose VCpu resources should be cleaned up.
///
/// # Note
///
/// This should be called after all VCpu threads have exited to avoid resource leaks.
/// It will join all VCpu tasks to ensure they are fully cleaned up.
pub(crate) fn cleanup_vm_vcpus(vm_id: usize) {
    if let Some(vm_vcpus) = VM_VCPU_TASKS.lock().remove(&vm_id) {
        // Take task references out before joining so we never block while
        // holding the per-VM task-list lock.
        let tasks: Vec<_> = vm_vcpus.vcpu_task_list.lock().values().cloned().collect();
        let task_count = tasks.len();

        info!("VM[{}] Joining {} VCpu tasks...", vm_id, task_count);

        // Join all VCpu tasks to ensure they have fully exited and cleaned up
        for (idx, task) in tasks.iter().enumerate() {
            debug!(
                "VM[{}] Joining VCpu task[{}]: {}",
                vm_id,
                idx,
                task.id_name()
            );
            let exit_code = task.join();
            debug!(
                "VM[{}] VCpu task[{}] exited with code: {}",
                vm_id, idx, exit_code
            );
        }

        info!(
            "VM[{}] VCpu resources cleaned up, {} VCpu tasks joined successfully",
            vm_id, task_count
        );
    } else {
        warn!("VM[{}] VCpu resources not found in queue", vm_id);
    }
}

/// Marks the VCpu of the specified VM as running.
fn mark_vcpu_running(vm_id: usize) {
    if let Some(vm_vcpus) = get_vm_vcpus(vm_id) {
        vm_vcpus.mark_vcpu_running();
    }
}

/// Boot target VCpu on the specified VM.
/// This function is used to boot a secondary VCpu on a VM, setting the entry point and argument for the VCpu.
///
/// # Arguments
///
/// * `vm_id` - The ID of the VM on which the VCpu is to be booted.
/// * `vcpu_id` - The ID of the VCpu to be booted.
/// * `entry_point` - The entry point of the VCpu.
/// * `arg` - The argument to be passed to the VCpu.
fn vcpu_on(vm: VMRef, vcpu_id: usize, entry_point: GuestPhysAddr, arg: usize) -> AxResult {
    let vcpu = vm
        .vcpu_list()
        .get(vcpu_id)
        .cloned()
        .ok_or_else(|| ax_err_type!(NotFound, format!("vCPU {vcpu_id} not found")))?;
    if vcpu.state() != VCpuState::Free {
        return Err(ax_err_type!(
            BadState,
            format!("vCPU {} invalid state {:?}", vcpu.id(), vcpu.state())
        ));
    }

    vcpu.set_entry(entry_point)?;
    #[cfg(not(target_arch = "riscv64"))]
    vcpu.set_gpr(0, arg);

    #[cfg(target_arch = "riscv64")]
    {
        info!(
            "vcpu_on: vcpu[{}] entry={:x} opaque={:x}",
            vcpu_id, entry_point, arg
        );
        vcpu.set_gpr(RiscvGprIndex::A0 as usize, vcpu_id);
        vcpu.set_gpr(RiscvGprIndex::A1 as usize, arg);
    }

    let vm_vcpus = get_vm_vcpus(vm.id()).ok_or_else(|| {
        ax_err_type!(
            NotFound,
            format!("VM[{}] vCPU resources not found", vm.id())
        )
    })?;
    let vcpu_task = alloc_vcpu_task(&vm, vcpu);
    vm_vcpus.add_vcpu_task(vcpu_id, vcpu_task);
    Ok(())
}

/// Sets up the primary VCpu for the given VM,
/// generally the first VCpu in the VCpu list,
/// and initializing their respective wait queues and task lists.
/// VM's secondary VCpus are not started at this point.
///
/// # Arguments
///
/// * `vm` - A reference to the VM for which the VCpus are being set up.
pub fn setup_vm_primary_vcpu(vm: VMRef) {
    info!("Initializing VM[{}]'s {} vcpus", vm.id(), vm.vcpu_num());
    let vm_id = vm.id();
    let primary_vcpu_id = 0;

    let Some(primary_vcpu) = vm.vcpu_list().get(primary_vcpu_id).cloned() else {
        warn!("VM[{vm_id}] has no primary vCPU");
        return;
    };
    let vm_vcpus = Arc::new(VMVCpus::new(vm.clone()));
    {
        let mut vm_vcpu_tasks = VM_VCPU_TASKS.lock();
        if vm_vcpu_tasks.contains_key(&vm_id) {
            debug!("VM[{vm_id}] vCPU resources already exist");
            return;
        }
        vm_vcpu_tasks.insert(vm_id, vm_vcpus.clone());
    }

    let primary_vcpu_task = alloc_vcpu_task(&vm, primary_vcpu);
    vm_vcpus.add_vcpu_task(0, primary_vcpu_task);
}

/// Allocates arceos task for vcpu, set the task's entry function to [`vcpu_run()`],
/// also initializes the CPU mask if the VCpu has a dedicated physical CPU set.
///
/// # Arguments
///
/// * `vm` - A reference to the VM for which the VCpu task is being allocated.
/// * `vcpu` - A reference to the VCpu for which the task is being allocated.
///
/// # Returns
///
/// A reference to the task that has been allocated for the VCpu.
///
/// # Note
///
/// * The task associated with the VCpu is created with a kernel stack size of 256 KiB.
/// * The task is created in blocked state and added to the wait queue directly,
///   instead of being added to the ready queue. It will be woken up by notify_primary_vcpu().
fn alloc_vcpu_task(vm: &VMRef, vcpu: VCpuRef) -> crate::AxTaskRef {
    info!("Spawning task for VM[{}] VCpu[{}]", vm.id(), vcpu.id());
    let mut vcpu_task = crate::TaskInner::new(
        vcpu_run,
        format!("VM[{}]-VCpu[{}]", vm.id(), vcpu.id()),
        KERNEL_STACK_SIZE,
    );

    if let Some(phys_cpu_set) = vcpu.phys_cpu_set() {
        vcpu_task.set_cpumask(crate::host::task::cpu_mask_from_raw_bits(phys_cpu_set));
    }

    // Use Weak reference in TaskExt to avoid keeping VM alive
    let inner = VCpuTask::new(vm, vcpu);
    *vcpu_task.task_ext_mut() = Some(crate::AxTaskExt::from_impl(inner));

    info!(
        "VCpu task {} created {:?}",
        vcpu_task.id_name(),
        vcpu_task.cpumask()
    );
    crate::host::task::spawn_task(vcpu_task)
}

/// The main routine for VCpu task.
/// This function is the entry point for the VCpu tasks, which are spawned for each VCpu of a VM.
///
/// When the VCpu first starts running, it waits for the VM to be in the running state.
/// It then enters a loop where it runs the VCpu and handles the various exit reasons.
fn vcpu_run() {
    let curr = crate::host::task::current_task();

    let vm = curr.as_vcpu_task().vm();
    let vcpu = curr.as_vcpu_task().vcpu.clone();
    let vm_id = vm.id();
    let vcpu_id = vcpu.id();
    let Some(vm_vcpus) = get_vm_vcpus(vm_id) else {
        warn!("VM[{vm_id}] vCPU resources not found, VCpu[{vcpu_id}] exiting");
        return;
    };

    info!("VM[{}] VCpu[{}] waiting for running", vm.id(), vcpu.id());
    wait_for(&vm_vcpus, || vm.running());

    info!("VM[{}] VCpu[{}] running...", vm.id(), vcpu.id());
    #[cfg(target_arch = "x86_64")]
    super::x86_irq::enable_ioapic_irq_forwarding(&vm, &vcpu);
    mark_vcpu_running(vm_id);

    loop {
        inject_pending_interrupts(vm_id, vcpu_id, &vcpu);

        #[cfg(target_arch = "x86_64")]
        super::x86_irq::drain_pending_ioapic_irqs(&vm, &vcpu);

        match vm.run_vcpu(vcpu_id) {
            Ok(exit_reason) => match exit_reason {
                AxVCpuExitReason::Hypercall { nr, args } => {
                    debug!("Hypercall [{nr}] args {args:x?}");
                    use crate::runtime::hvc::HyperCall;

                    match HyperCall::new(vm.clone(), nr, args) {
                        Ok(hypercall) => {
                            let ret_val = match hypercall.execute() {
                                Ok(ret_val) => ret_val as isize,
                                Err(err) => {
                                    warn!("Hypercall [{nr:#x}] failed: {err:?}");
                                    -1
                                }
                            };
                            vcpu.set_return_value(ret_val as usize);
                        }
                        Err(err) => {
                            warn!("Hypercall [{nr:#x}] failed: {err:?}");
                        }
                    }
                }
                AxVCpuExitReason::FailEntry {
                    hardware_entry_failure_reason,
                } => {
                    warn!(
                        "VM[{vm_id}] VCpu[{vcpu_id}] run failed with exit code \
                         {hardware_entry_failure_reason}"
                    );
                }
                AxVCpuExitReason::ExternalInterrupt { vector } => {
                    debug!("VM[{vm_id}] run VCpu[{vcpu_id}] get irq {vector}");

                    // TODO: maybe move this irq dispatcher to lower layer to accelerate the interrupt handling
                    #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
                    crate::host::arceos::dispatch_host_irq(vector as usize);
                    #[cfg(target_arch = "riscv64")]
                    vcpu.with_current_cpu_set(|| {
                        crate::host::arceos::dispatch_host_irq(vector as usize);
                        vcpu.get_arch_vcpu().latch_hvip_from_hw();
                    });
                    crate::check_timer_events();
                    #[cfg(target_arch = "x86_64")]
                    super::x86_irq::forward_passthrough_irq_from_vmexit(
                        &vm,
                        &vcpu,
                        vector as usize,
                    );
                    #[cfg(target_arch = "x86_64")]
                    super::x86_irq::inject_pending_serial_irq(&vm, &vcpu);
                }
                AxVCpuExitReason::PreemptionTimer => {
                    crate::timer::check_events();
                    #[cfg(target_arch = "x86_64")]
                    super::x86_irq::inject_due_pit_irq0(&vm, &vcpu);
                    #[cfg(target_arch = "x86_64")]
                    super::x86_irq::inject_pending_serial_irq(&vm, &vcpu);
                }
                AxVCpuExitReason::InterruptEnd { vector: _vector } => {
                    #[cfg(target_arch = "x86_64")]
                    if let Some(vector) = _vector {
                        super::x86_irq::inject_pending_ioapic_irq_after_eoi(&vm, &vcpu, vector);
                    }
                }
                AxVCpuExitReason::Halt => {
                    debug!("VM[{vm_id}] run VCpu[{vcpu_id}] Halt");
                    #[cfg(target_arch = "x86_64")]
                    super::x86_irq::inject_pending_serial_irq(&vm, &vcpu);
                    #[cfg(target_arch = "x86_64")]
                    continue;
                    #[cfg(not(target_arch = "x86_64"))]
                    wait(&vm_vcpus)
                }
                AxVCpuExitReason::Nothing => {}
                AxVCpuExitReason::CpuDown { _state } => {
                    warn!("VM[{vm_id}] run VCpu[{vcpu_id}] CpuDown state {_state:#x}");
                    wait(&vm_vcpus)
                }
                AxVCpuExitReason::CpuUp {
                    target_cpu,
                    entry_point,
                    arg,
                } => {
                    info!(
                        "VM[{vm_id}]'s VCpu[{vcpu_id}] try to boot target_cpu [{target_cpu}] \
                         entry_point={entry_point:x} arg={arg:#x}"
                    );

                    // Get the mapping relationship between all vCPUs and physical CPUs from the configuration
                    let vcpu_mappings = vm.get_vcpu_affinities_pcpu_ids();

                    // Find the vCPU ID corresponding to the physical ID
                    let Some(target_vcpu_id) =
                        vcpu_mappings.iter().find_map(|(vcpu_id, _, phys_id)| {
                            (*phys_id == target_cpu as usize).then_some(*vcpu_id)
                        })
                    else {
                        warn!("Physical CPU ID {target_cpu} not found in VM configuration");
                        vcpu.set_return_value(usize::MAX);
                        continue;
                    };

                    match vcpu_on(vm.clone(), target_vcpu_id, entry_point, arg as _) {
                        Ok(()) => {
                            #[cfg(not(target_arch = "riscv64"))]
                            vcpu.set_gpr(0, 0);
                            #[cfg(target_arch = "riscv64")]
                            vcpu.set_gpr(RiscvGprIndex::A0 as usize, 0);
                        }
                        Err(err) => {
                            warn!("Failed to boot VM[{vm_id}] VCpu[{target_vcpu_id}]: {err:?}");
                            vcpu.set_return_value(usize::MAX);
                        }
                    }
                }
                AxVCpuExitReason::SystemDown => {
                    warn!("VM[{vm_id}] run VCpu[{vcpu_id}] SystemDown");
                    if let Err(err) = vm.shutdown() {
                        warn!("VM[{vm_id}] shutdown failed: {err:?}");
                    }
                    // Notify all vCPUs to wake up to check the shutdown flag
                    notify_all_vcpus(vm_id);
                }
                AxVCpuExitReason::SendIPI {
                    target_cpu,
                    target_cpu_aux,
                    send_to_all,
                    send_to_self,
                    vector,
                } => {
                    debug!(
                        "VM[{vm_id}] run VCpu[{vcpu_id}] SendIPI, target_cpu={target_cpu:#x}, \
                         target_cpu_aux={target_cpu_aux:#x}, vector={vector}",
                    );
                    let targets = ipi_targets(
                        &vm,
                        vcpu_id,
                        target_cpu,
                        target_cpu_aux,
                        send_to_all,
                        send_to_self,
                    );
                    if targets.is_empty() {
                        warn!(
                            "VM[{vm_id}] SendIPI has no target: target_cpu={target_cpu:#x}, \
                             target_cpu_aux={target_cpu_aux:#x}"
                        );
                        continue;
                    }

                    if targets.get(vcpu_id) {
                        crate::inject_current_vcpu_interrupt(vector as _)
                            .expect("failed to inject self IPI into current vCPU");
                    }
                    let mut remote_targets = targets;
                    remote_targets.set(vcpu_id, false);
                    if !remote_targets.is_empty()
                        && let Err(err) = vm.inject_interrupt_to_vcpu(remote_targets, vector as _)
                    {
                        warn!(
                            "Failed to inject interrupt {vector} to VM[{vm_id}] targets \
                             {remote_targets:?}: {err:?}"
                        );
                    }
                }
                e => {
                    warn!("VM[{vm_id}] run VCpu[{vcpu_id}] unhandled vmexit: {e:?}");
                }
            },
            Err(err) => {
                error!("VM[{vm_id}] run VCpu[{vcpu_id}] get error {err:?}");
                if let Err(err) = vm.shutdown() {
                    warn!("VM[{vm_id}] shutdown failed after vCPU error: {err:?}");
                }
                // Notify all vCPUs to wake up to check the shutdown flag
                notify_all_vcpus(vm_id);
            }
        }

        // Check if the VM is suspended
        if vm.suspending() {
            debug!(
                "VM[{}] VCpu[{}] is suspended, waiting for resume...",
                vm_id, vcpu_id
            );
            wait_for(&vm_vcpus, || !vm.suspending());
            info!("VM[{}] VCpu[{}] resumed from suspend", vm_id, vcpu_id);
            continue;
        }

        // Check if the VM is stopping.
        if vm.stopping() {
            warn!(
                "VM[{}] VCpu[{}] stopping because of VM stopping",
                vm_id, vcpu_id
            );

            if vm_vcpus.mark_vcpu_exiting() {
                info!("VM[{vm_id}] VCpu[{vcpu_id}] last VCpu exiting, decreasing running VM count");

                // Transition from Stopping to Stopped
                vm.set_vm_status(crate::VMStatus::Stopped);
                info!("VM[{}] state changed to Stopped", vm_id);

                #[cfg(target_arch = "x86_64")]
                super::x86_irq::disable_ioapic_irq_forwarding_for_vm(vm_id);

                sub_running_vm_count(1);
                crate::host::task::wait_queue_wake(&super::VMM, 1);
            }

            break;
        }
    }

    info!("VM[{}] VCpu[{}] exiting...", vm_id, vcpu_id);
}