Skip to main content

kvm_ioctls/ioctls/
vcpu.rs

1// Copyright © 2024 Institute of Software, CAS. All rights reserved.
2//
3// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4// SPDX-License-Identifier: Apache-2.0 OR MIT
5//
6// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the THIRD-PARTY file.
9
10// Part of public API
11#[cfg(target_arch = "x86_64")]
12pub use kvm_bindings::nested::KvmNestedStateBuffer;
13
14use kvm_bindings::*;
15use libc::EINVAL;
16use std::fs::File;
17use std::os::unix::io::{AsRawFd, RawFd};
18
19use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result};
20use crate::kvm_ioctls::*;
21use vmm_sys_util::errno;
22use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref};
23#[cfg(target_arch = "x86_64")]
24use {
25    std::num::NonZeroUsize,
26    vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr, ioctl_with_val},
27};
28
29/// Helper method to obtain the size of the register through its id
30#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
31pub fn reg_size(reg_id: u64) -> usize {
32    2_usize.pow(((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT) as u32)
33}
34
35/// Information about a [`VcpuExit`] triggered by an Hypercall (`KVM_EXIT_HYPERCALL`).
36#[derive(Debug)]
37pub struct HypercallExit<'a> {
38    /// The hypercall number.
39    pub nr: u64,
40    /// The arguments for the hypercall.
41    pub args: [u64; 6],
42    /// The return code to be indicated to the guest.
43    pub ret: &'a mut u64,
44    /// Whether the hypercall was executed in long mode.
45    pub longmode: u32,
46}
47
48/// Information about a [`VcpuExit`] triggered by an MSR read (`KVM_EXIT_X86_RDMSR`).
49#[derive(Debug)]
50pub struct ReadMsrExit<'a> {
51    /// Must be set to 1 by the the user if the read access should fail. This
52    /// will inject a #GP fault into the guest when the VCPU is executed
53    /// again.
54    pub error: &'a mut u8,
55    /// The reason for this exit.
56    pub reason: MsrExitReason,
57    /// The MSR the guest wants to read.
58    pub index: u32,
59    /// The data to be supplied by the user as the MSR Contents to the guest.
60    pub data: &'a mut u64,
61}
62
63/// Information about a [`VcpuExit`] triggered by an MSR write (`KVM_EXIT_X86_WRMSR`).
64#[derive(Debug)]
65pub struct WriteMsrExit<'a> {
66    /// Must be set to 1 by the the user if the write access should fail. This
67    /// will inject a #GP fault into the guest when the VCPU is executed
68    /// again.
69    pub error: &'a mut u8,
70    /// The reason for this exit.
71    pub reason: MsrExitReason,
72    /// The MSR the guest wants to write.
73    pub index: u32,
74    /// The data the guest wants to write into the MSR.
75    pub data: u64,
76}
77
78bitflags::bitflags! {
79    /// The reason for a [`VcpuExit::X86Rdmsr`] or[`VcpuExit::X86Wrmsr`]. This
80    /// is also used when enabling
81    /// [`Cap::X86UserSpaceMsr`](crate::Cap::X86UserSpaceMsr) to specify which
82    /// reasons should be forwarded to the user via those exits.
83    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
84    pub struct MsrExitReason: u32 {
85        /// Corresponds to [`KVM_MSR_EXIT_REASON_UNKNOWN`]. The exit was
86        /// triggered by an access to an MSR that is unknown to KVM.
87        const Unknown = KVM_MSR_EXIT_REASON_UNKNOWN;
88        /// Corresponds to [`KVM_MSR_EXIT_REASON_INVAL`]. The exit was
89        /// triggered by an access to an invalid MSR or to reserved bits.
90        const Inval = KVM_MSR_EXIT_REASON_INVAL;
91        /// Corresponds to [`KVM_MSR_EXIT_REASON_FILTER`]. The exit was
92        /// triggered by an access to a filtered MSR.
93        const Filter = KVM_MSR_EXIT_REASON_FILTER;
94    }
95}
96
97/// Reasons for vCPU exits.
98///
99/// The exit reasons are mapped to the `KVM_EXIT_*` defines in the
100/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h).
101#[derive(Debug)]
102pub enum VcpuExit<'a> {
103    /// An out port instruction was run on the given port with the given data.
104    IoOut(u16 /* port */, &'a [u8] /* data */),
105    /// An in port instruction was run on the given port.
106    ///
107    /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run)
108    /// is called again.
109    IoIn(u16 /* port */, &'a mut [u8] /* data */),
110    /// A read instruction was run against the given MMIO address.
111    ///
112    /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run)
113    /// is called again.
114    MmioRead(u64 /* address */, &'a mut [u8]),
115    /// A write instruction was run against the given MMIO address with the given data.
116    MmioWrite(u64 /* address */, &'a [u8]),
117    /// Corresponds to KVM_EXIT_UNKNOWN.
118    Unknown,
119    /// Corresponds to KVM_EXIT_EXCEPTION.
120    Exception,
121    /// Corresponds to KVM_EXIT_HYPERCALL.
122    Hypercall(HypercallExit<'a>),
123    /// Corresponds to KVM_EXIT_DEBUG.
124    ///
125    /// Provides architecture specific information for the debug event.
126    Debug(kvm_debug_exit_arch),
127    /// Corresponds to KVM_EXIT_HLT.
128    Hlt,
129    /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN.
130    IrqWindowOpen,
131    /// Corresponds to KVM_EXIT_SHUTDOWN.
132    Shutdown,
133    /// Corresponds to KVM_EXIT_FAIL_ENTRY.
134    FailEntry(
135        u64, /* hardware_entry_failure_reason */
136        u32, /* cpu */
137    ),
138    /// Corresponds to KVM_EXIT_INTR.
139    Intr,
140    /// Corresponds to KVM_EXIT_SET_TPR.
141    SetTpr,
142    /// Corresponds to KVM_EXIT_TPR_ACCESS.
143    TprAccess,
144    /// Corresponds to KVM_EXIT_S390_SIEIC.
145    S390Sieic,
146    /// Corresponds to KVM_EXIT_S390_RESET.
147    S390Reset,
148    /// Corresponds to KVM_EXIT_DCR.
149    Dcr,
150    /// Corresponds to KVM_EXIT_NMI.
151    Nmi,
152    /// Corresponds to KVM_EXIT_INTERNAL_ERROR.
153    InternalError,
154    /// Corresponds to KVM_EXIT_OSI.
155    Osi,
156    /// Corresponds to KVM_EXIT_PAPR_HCALL.
157    PaprHcall,
158    /// Corresponds to KVM_EXIT_S390_UCONTROL.
159    S390Ucontrol,
160    /// Corresponds to KVM_EXIT_WATCHDOG.
161    Watchdog,
162    /// Corresponds to KVM_EXIT_S390_TSCH.
163    S390Tsch,
164    /// Corresponds to KVM_EXIT_EPR.
165    Epr,
166    /// Corresponds to KVM_EXIT_SYSTEM_EVENT.
167    SystemEvent(u32 /* type */, &'a [u64] /* data */),
168    /// Corresponds to KVM_EXIT_S390_STSI.
169    S390Stsi,
170    /// Corresponds to KVM_EXIT_IOAPIC_EOI.
171    IoapicEoi(u8 /* vector */),
172    /// Corresponds to KVM_EXIT_HYPERV.
173    Hyperv,
174    /// Corresponds to KVM_EXIT_X86_RDMSR.
175    X86Rdmsr(ReadMsrExit<'a>),
176    /// Corresponds to KVM_EXIT_X86_WRMSR.
177    X86Wrmsr(WriteMsrExit<'a>),
178    /// Corresponds to KVM_EXIT_MEMORY_FAULT.
179    MemoryFault {
180        /// flags
181        flags: u64,
182        /// gpa
183        gpa: u64,
184        /// size
185        size: u64,
186    },
187    /// Corresponds to an exit reason that is unknown from the current version
188    /// of the kvm-ioctls crate. Let the consumer decide about what to do with
189    /// it.
190    Unsupported(u32),
191}
192
193/// Wrapper over KVM vCPU ioctls.
194#[derive(Debug)]
195pub struct VcpuFd {
196    vcpu: File,
197    kvm_run_ptr: KvmRunWrapper,
198    /// A pointer to the coalesced MMIO page
199    coalesced_mmio_ring: Option<KvmCoalescedIoRing>,
200}
201
202/// KVM Sync Registers used to tell KVM which registers to sync
203#[repr(u32)]
204#[derive(Debug, Copy, Clone)]
205#[cfg(target_arch = "x86_64")]
206pub enum SyncReg {
207    /// General purpose registers,
208    Register = KVM_SYNC_X86_REGS,
209
210    /// System registers
211    SystemRegister = KVM_SYNC_X86_SREGS,
212
213    /// CPU events
214    VcpuEvents = KVM_SYNC_X86_EVENTS,
215}
216
217impl VcpuFd {
218    /// Returns the vCPU general purpose registers.
219    ///
220    /// The registers are returned in a `kvm_regs` structure as defined in the
221    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
222    /// See documentation for `KVM_GET_REGS`.
223    ///
224    /// # Example
225    ///
226    /// ```rust
227    /// # use kvm_ioctls::Kvm;
228    /// let kvm = Kvm::new().unwrap();
229    /// let vm = kvm.create_vm().unwrap();
230    /// let vcpu = vm.create_vcpu(0).unwrap();
231    /// let regs = vcpu.get_regs().unwrap();
232    /// ```
233    #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
234    pub fn get_regs(&self) -> Result<kvm_regs> {
235        let mut regs = kvm_regs::default();
236        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
237        // read the correct amount of memory from our pointer, and we verify the return result.
238        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
239        if ret != 0 {
240            return Err(errno::Error::last());
241        }
242        Ok(regs)
243    }
244
245    /// Sets a specified piece of cpu configuration and/or state.
246    ///
247    /// See the documentation for `KVM_SET_DEVICE_ATTR` in
248    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt)
249    /// # Arguments
250    ///
251    /// * `device_attr` - The cpu attribute to be set.
252    ///
253    /// # Example
254    ///
255    /// ```rust
256    /// # use kvm_ioctls::Kvm;
257    /// # use kvm_bindings::{
258    ///    KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT
259    /// };
260    /// let kvm = Kvm::new().unwrap();
261    /// let vm = kvm.create_vm().unwrap();
262    /// let vcpu = vm.create_vcpu(0).unwrap();
263    ///
264    /// let dist_attr = kvm_bindings::kvm_device_attr {
265    ///     group: KVM_ARM_VCPU_PMU_V3_CTRL,
266    ///     attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
267    ///     addr: 0x0,
268    ///     flags: 0,
269    /// };
270    ///
271    /// if (vcpu.has_device_attr(&dist_attr).is_ok()) {
272    ///     vcpu.set_device_attr(&dist_attr).unwrap();
273    /// }
274    /// ```
275    #[cfg(target_arch = "aarch64")]
276    pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> {
277        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
278        let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) };
279        if ret != 0 {
280            return Err(errno::Error::last());
281        }
282        Ok(())
283    }
284
285    /// Tests whether a cpu supports a particular attribute.
286    ///
287    /// See the documentation for `KVM_HAS_DEVICE_ATTR` in
288    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt)
289    /// # Arguments
290    ///
291    /// * `device_attr` - The cpu attribute to be tested. `addr` field is ignored.
292    ///
293    /// # Example
294    ///
295    /// ```rust
296    /// # use kvm_ioctls::Kvm;
297    /// # use kvm_bindings::{
298    ///    KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT
299    /// };
300    /// let kvm = Kvm::new().unwrap();
301    /// let vm = kvm.create_vm().unwrap();
302    /// let vcpu = vm.create_vcpu(0).unwrap();
303    ///
304    /// let dist_attr = kvm_bindings::kvm_device_attr {
305    ///     group: KVM_ARM_VCPU_PMU_V3_CTRL,
306    ///     attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
307    ///     addr: 0x0,
308    ///     flags: 0,
309    /// };
310    ///
311    /// vcpu.has_device_attr(&dist_attr);
312    /// ```
313    #[cfg(target_arch = "aarch64")]
314    pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> {
315        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
316        let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) };
317        if ret != 0 {
318            return Err(errno::Error::last());
319        }
320        Ok(())
321    }
322
323    /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
324    ///
325    /// # Arguments
326    ///
327    /// * `regs` - general purpose registers. For details check the `kvm_regs` structure in the
328    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
329    ///
330    /// # Example
331    ///
332    /// ```rust
333    /// # use kvm_ioctls::Kvm;
334    /// let kvm = Kvm::new().unwrap();
335    /// let vm = kvm.create_vm().unwrap();
336    /// let vcpu = vm.create_vcpu(0).unwrap();
337    ///
338    /// // Get the current vCPU registers.
339    /// let mut regs = vcpu.get_regs().unwrap();
340    /// // Set a new value for the Instruction Pointer.
341    /// regs.rip = 0x100;
342    /// vcpu.set_regs(&regs).unwrap();
343    /// ```
344    #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
345    pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
346        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
347        // read the correct amount of memory from our pointer, and we verify the return result.
348        let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
349        if ret != 0 {
350            return Err(errno::Error::last());
351        }
352        Ok(())
353    }
354
355    /// Returns the vCPU special registers.
356    ///
357    /// The registers are returned in a `kvm_sregs` structure as defined in the
358    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
359    /// See documentation for `KVM_GET_SREGS`.
360    ///
361    /// # Example
362    ///
363    /// ```rust
364    /// # use kvm_ioctls::Kvm;
365    /// let kvm = Kvm::new().unwrap();
366    /// let vm = kvm.create_vm().unwrap();
367    /// let vcpu = vm.create_vcpu(0).unwrap();
368    /// let sregs = vcpu.get_sregs().unwrap();
369    /// ```
370    #[cfg(target_arch = "x86_64")]
371    pub fn get_sregs(&self) -> Result<kvm_sregs> {
372        let mut regs = kvm_sregs::default();
373        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
374        // write the correct amount of memory to our pointer, and we verify the return result.
375        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
376        if ret != 0 {
377            return Err(errno::Error::last());
378        }
379        Ok(regs)
380    }
381
382    /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
383    ///
384    /// # Arguments
385    ///
386    /// * `sregs` - Special registers. For details check the `kvm_sregs` structure in the
387    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
388    ///
389    /// # Example
390    ///
391    /// ```rust
392    /// # use kvm_ioctls::Kvm;
393    /// let kvm = Kvm::new().unwrap();
394    /// let vm = kvm.create_vm().unwrap();
395    /// let vcpu = vm.create_vcpu(0).unwrap();
396    ///
397    /// let mut sregs = vcpu.get_sregs().unwrap();
398    /// // Update the code segment (cs).
399    /// sregs.cs.base = 0;
400    /// sregs.cs.selector = 0;
401    /// vcpu.set_sregs(&sregs).unwrap();
402    /// ```
403    #[cfg(target_arch = "x86_64")]
404    pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
405        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
406        // read the correct amount of memory from our pointer, and we verify the return result.
407        let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
408        if ret != 0 {
409            return Err(errno::Error::last());
410        }
411        Ok(())
412    }
413
414    /// Returns the floating point state (FPU) from the vCPU.
415    ///
416    /// The state is returned in a `kvm_fpu` structure as defined in the
417    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
418    /// See the documentation for `KVM_GET_FPU`.
419    ///
420    /// # Example
421    ///
422    /// ```rust
423    /// # use kvm_ioctls::Kvm;
424    /// let kvm = Kvm::new().unwrap();
425    /// let vm = kvm.create_vm().unwrap();
426    /// let vcpu = vm.create_vcpu(0).unwrap();
427    /// let fpu = vcpu.get_fpu().unwrap();
428    /// ```
429    #[cfg(target_arch = "x86_64")]
430    pub fn get_fpu(&self) -> Result<kvm_fpu> {
431        let mut fpu = kvm_fpu::default();
432        // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct.
433        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) };
434        if ret != 0 {
435            return Err(errno::Error::last());
436        }
437        Ok(fpu)
438    }
439
440    /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
441    ///
442    /// # Arguments
443    ///
444    /// * `fpu` - FPU configuration. For details check the `kvm_fpu` structure in the
445    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
446    ///
447    /// # Example
448    ///
449    /// ```rust
450    /// # use kvm_ioctls::Kvm;
451    /// # use kvm_bindings::kvm_fpu;
452    /// let kvm = Kvm::new().unwrap();
453    /// let vm = kvm.create_vm().unwrap();
454    /// let vcpu = vm.create_vcpu(0).unwrap();
455    ///
456    /// let KVM_FPU_CWD: u16 = 0x37f;
457    /// let fpu = kvm_fpu {
458    ///     fcw: KVM_FPU_CWD,
459    ///     ..Default::default()
460    /// };
461    /// vcpu.set_fpu(&fpu).unwrap();
462    /// ```
463    #[cfg(target_arch = "x86_64")]
464    pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
465        // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct.
466        let ret = unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) };
467        if ret < 0 {
468            return Err(errno::Error::last());
469        }
470        Ok(())
471    }
472
473    /// X86 specific call to setup the CPUID registers.
474    ///
475    /// See the documentation for `KVM_SET_CPUID2`.
476    ///
477    /// # Arguments
478    ///
479    /// * `cpuid` - CPUID registers.
480    ///
481    /// # Example
482    ///
483    ///  ```rust
484    /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES;
485    /// # use kvm_ioctls::Kvm;
486    /// let kvm = Kvm::new().unwrap();
487    /// let mut kvm_cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
488    /// let vm = kvm.create_vm().unwrap();
489    /// let vcpu = vm.create_vcpu(0).unwrap();
490    ///
491    /// // Update the CPUID entries to disable the EPB feature.
492    /// const ECX_EPB_SHIFT: u32 = 3;
493    /// let entries = kvm_cpuid.as_mut_slice();
494    /// for entry in entries.iter_mut() {
495    ///     match entry.function {
496    ///         6 => entry.ecx &= !(1 << ECX_EPB_SHIFT),
497    ///         _ => (),
498    ///     }
499    /// }
500    ///
501    /// vcpu.set_cpuid2(&kvm_cpuid).unwrap();
502    /// ```
503    ///
504    #[cfg(target_arch = "x86_64")]
505    pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
506        // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct.
507        let ret = unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) };
508        if ret < 0 {
509            return Err(errno::Error::last());
510        }
511        Ok(())
512    }
513
514    /// X86 specific call to retrieve the CPUID registers.
515    ///
516    /// It requires knowledge of how many `kvm_cpuid_entry2` entries there are to get.
517    /// See the documentation for `KVM_GET_CPUID2` in the
518    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
519    ///
520    /// # Arguments
521    ///
522    /// * `num_entries` - Number of CPUID entries to be read.
523    ///
524    /// # Example
525    ///
526    ///  ```rust
527    /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES;
528    /// # use kvm_ioctls::Kvm;
529    /// let kvm = Kvm::new().unwrap();
530    /// let vm = kvm.create_vm().unwrap();
531    /// let vcpu = vm.create_vcpu(0).unwrap();
532    /// let cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES).unwrap();
533    /// ```
534    ///
535    #[cfg(target_arch = "x86_64")]
536    pub fn get_cpuid2(&self, num_entries: usize) -> Result<CpuId> {
537        if num_entries > KVM_MAX_CPUID_ENTRIES {
538            // Returns the same error the underlying `ioctl` would have sent.
539            return Err(errno::Error::new(libc::ENOMEM));
540        }
541
542        let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?;
543        let ret =
544            // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct.
545            unsafe { ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) };
546        if ret != 0 {
547            return Err(errno::Error::last());
548        }
549        Ok(cpuid)
550    }
551
552    ///
553    /// See the documentation for `KVM_ENABLE_CAP`.
554    ///
555    /// # Arguments
556    ///
557    /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap`
558    ///   structure in the
559    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
560    ///
561    /// # Example
562    ///
563    ///  ```rust
564    /// # use kvm_bindings::{kvm_enable_cap, KVM_MAX_CPUID_ENTRIES, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP};
565    /// # use kvm_ioctls::{Kvm, Cap};
566    /// let kvm = Kvm::new().unwrap();
567    /// let vm = kvm.create_vm().unwrap();
568    /// let mut cap: kvm_enable_cap = Default::default();
569    /// // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled
570    /// cap.cap = KVM_CAP_SPLIT_IRQCHIP;
571    /// cap.args[0] = 24;
572    /// vm.enable_cap(&cap).unwrap();
573    ///
574    /// let vcpu = vm.create_vcpu(0).unwrap();
575    /// if kvm.check_extension(Cap::HypervSynic) {
576    ///     let mut cap: kvm_enable_cap = Default::default();
577    ///     cap.cap = KVM_CAP_HYPERV_SYNIC;
578    ///     vcpu.enable_cap(&cap).unwrap();
579    /// }
580    /// ```
581    ///
582    #[cfg(target_arch = "x86_64")]
583    pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
584        // SAFETY: The ioctl is safe because we allocated the struct and we know the
585        // kernel will write exactly the size of the struct.
586        let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) };
587        if ret == 0 {
588            Ok(())
589        } else {
590            Err(errno::Error::last())
591        }
592    }
593
594    /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
595    ///
596    /// The state is returned in a `kvm_lapic_state` structure as defined in the
597    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
598    /// See the documentation for `KVM_GET_LAPIC`.
599    ///
600    /// # Example
601    ///
602    /// ```rust
603    /// # use kvm_ioctls::Kvm;
604    /// let kvm = Kvm::new().unwrap();
605    /// let vm = kvm.create_vm().unwrap();
606    /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU.
607    /// vm.create_irq_chip().unwrap();
608    /// let vcpu = vm.create_vcpu(0).unwrap();
609    /// let lapic = vcpu.get_lapic().unwrap();
610    /// ```
611    #[cfg(target_arch = "x86_64")]
612    pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
613        let mut klapic = kvm_lapic_state::default();
614
615        // SAFETY: The ioctl is unsafe unless you trust the kernel not to write past the end of the
616        // local_apic struct.
617        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) };
618        if ret < 0 {
619            return Err(errno::Error::last());
620        }
621        Ok(klapic)
622    }
623
624    /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
625    ///
626    /// See the documentation for `KVM_SET_LAPIC`.
627    ///
628    /// # Arguments
629    ///
630    /// * `klapic` - LAPIC state. For details check the `kvm_lapic_state` structure in the
631    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
632    /// # Example
633    ///
634    /// ```rust
635    /// # use kvm_ioctls::Kvm;
636    /// use std::io::Write;
637    ///
638    /// let kvm = Kvm::new().unwrap();
639    /// let vm = kvm.create_vm().unwrap();
640    /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU.
641    /// vm.create_irq_chip().unwrap();
642    /// let vcpu = vm.create_vcpu(0).unwrap();
643    /// let mut lapic = vcpu.get_lapic().unwrap();
644    ///
645    /// // Write to APIC_ICR offset the value 2.
646    /// let apic_icr_offset = 0x300;
647    /// let write_value: &[u8] = &[2, 0, 0, 0];
648    /// let mut apic_icr_slice =
649    ///     unsafe { &mut *(&mut lapic.regs[apic_icr_offset..] as *mut [i8] as *mut [u8]) };
650    /// apic_icr_slice.write(write_value).unwrap();
651    ///
652    /// // Update the value of LAPIC.
653    /// vcpu.set_lapic(&lapic).unwrap();
654    /// ```
655    #[cfg(target_arch = "x86_64")]
656    pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
657        // SAFETY: The ioctl is safe because the kernel will only read from the klapic struct.
658        let ret = unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) };
659        if ret < 0 {
660            return Err(errno::Error::last());
661        }
662        Ok(())
663    }
664
665    /// Returns the model-specific registers (MSR) for this vCPU.
666    ///
667    /// It emulates `KVM_GET_MSRS` ioctl's behavior by returning the number of MSRs
668    /// successfully read upon success or the last error number in case of failure.
669    /// The MSRs are returned in the `msr` method argument.
670    ///
671    /// # Arguments
672    ///
673    /// * `msrs`  - MSRs (input/output). For details check the `kvm_msrs` structure in the
674    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
675    ///
676    /// # Example
677    ///
678    /// ```rust
679    /// # use kvm_ioctls::Kvm;
680    /// # use kvm_bindings::{kvm_msr_entry, Msrs};
681    /// let kvm = Kvm::new().unwrap();
682    /// let vm = kvm.create_vm().unwrap();
683    /// let vcpu = vm.create_vcpu(0).unwrap();
684    /// // Configure the struct to say which entries we want to get.
685    /// let mut msrs = Msrs::from_entries(&[
686    ///     kvm_msr_entry {
687    ///         index: 0x0000_0174,
688    ///         ..Default::default()
689    ///     },
690    ///     kvm_msr_entry {
691    ///         index: 0x0000_0175,
692    ///         ..Default::default()
693    ///     },
694    /// ])
695    /// .unwrap();
696    /// let read = vcpu.get_msrs(&mut msrs).unwrap();
697    /// assert_eq!(read, 2);
698    /// ```
699    #[cfg(target_arch = "x86_64")]
700    pub fn get_msrs(&self, msrs: &mut Msrs) -> Result<usize> {
701        // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct.
702        let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) };
703        if ret < 0 {
704            return Err(errno::Error::last());
705        }
706        Ok(ret as usize)
707    }
708
709    /// Setup the model-specific registers (MSR) for this vCPU.
710    /// Returns the number of MSR entries actually written.
711    ///
712    /// See the documentation for `KVM_SET_MSRS`.
713    ///
714    /// # Arguments
715    ///
716    /// * `msrs` - MSRs. For details check the `kvm_msrs` structure in the
717    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
718    /// # Example
719    ///
720    /// ```rust
721    /// # use kvm_ioctls::Kvm;
722    /// # use kvm_bindings::{kvm_msr_entry, Msrs};
723    /// let kvm = Kvm::new().unwrap();
724    /// let vm = kvm.create_vm().unwrap();
725    /// let vcpu = vm.create_vcpu(0).unwrap();
726    ///
727    /// // Configure the entries we want to set.
728    /// let mut msrs = Msrs::from_entries(&[kvm_msr_entry {
729    ///     index: 0x0000_0174,
730    ///     ..Default::default()
731    /// }])
732    /// .unwrap();
733    /// let written = vcpu.set_msrs(&msrs).unwrap();
734    /// assert_eq!(written, 1);
735    /// ```
736    #[cfg(target_arch = "x86_64")]
737    pub fn set_msrs(&self, msrs: &Msrs) -> Result<usize> {
738        // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct.
739        let ret = unsafe { ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) };
740        // KVM_SET_MSRS actually returns the number of msr entries written.
741        if ret < 0 {
742            return Err(errno::Error::last());
743        }
744        Ok(ret as usize)
745    }
746
747    /// Returns the vcpu's current "multiprocessing state".
748    ///
749    /// See the documentation for `KVM_GET_MP_STATE` in the
750    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
751    ///
752    /// # Arguments
753    ///
754    /// * `kvm_mp_state` - multiprocessing state to be read.
755    ///
756    /// # Example
757    ///
758    /// ```rust
759    /// # use kvm_ioctls::Kvm;
760    /// let kvm = Kvm::new().unwrap();
761    /// let vm = kvm.create_vm().unwrap();
762    /// let vcpu = vm.create_vcpu(0).unwrap();
763    /// let mp_state = vcpu.get_mp_state().unwrap();
764    /// ```
765    #[cfg(any(
766        target_arch = "x86_64",
767        target_arch = "aarch64",
768        target_arch = "riscv64",
769        target_arch = "s390x"
770    ))]
771    pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
772        let mut mp_state = Default::default();
773        // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct.
774        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) };
775        if ret != 0 {
776            return Err(errno::Error::last());
777        }
778        Ok(mp_state)
779    }
780
781    /// Sets the vcpu's current "multiprocessing state".
782    ///
783    /// See the documentation for `KVM_SET_MP_STATE` in the
784    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
785    ///
786    /// # Arguments
787    ///
788    /// * `kvm_mp_state` - multiprocessing state to be written.
789    ///
790    /// # Example
791    ///
792    /// ```rust
793    /// # use kvm_ioctls::Kvm;
794    /// let kvm = Kvm::new().unwrap();
795    /// let vm = kvm.create_vm().unwrap();
796    /// let vcpu = vm.create_vcpu(0).unwrap();
797    /// let mp_state = Default::default();
798    /// // Your `mp_state` manipulation here.
799    /// vcpu.set_mp_state(mp_state).unwrap();
800    /// ```
801    #[cfg(any(
802        target_arch = "x86_64",
803        target_arch = "aarch64",
804        target_arch = "riscv64",
805        target_arch = "s390x"
806    ))]
807    pub fn set_mp_state(&self, mp_state: kvm_mp_state) -> Result<()> {
808        // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct.
809        let ret = unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) };
810        if ret != 0 {
811            return Err(errno::Error::last());
812        }
813        Ok(())
814    }
815
816    /// X86 specific call that returns the vcpu's current "xsave struct".
817    ///
818    /// See the documentation for `KVM_GET_XSAVE` in the
819    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
820    ///
821    /// # Arguments
822    ///
823    /// * `kvm_xsave` - xsave struct to be read.
824    ///
825    /// # Example
826    ///
827    /// ```rust
828    /// # use kvm_ioctls::Kvm;
829    /// let kvm = Kvm::new().unwrap();
830    /// let vm = kvm.create_vm().unwrap();
831    /// let vcpu = vm.create_vcpu(0).unwrap();
832    /// let xsave = vcpu.get_xsave().unwrap();
833    /// ```
834    #[cfg(target_arch = "x86_64")]
835    pub fn get_xsave(&self) -> Result<kvm_xsave> {
836        let mut xsave = Default::default();
837        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct.
838        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) };
839        if ret != 0 {
840            return Err(errno::Error::last());
841        }
842        Ok(xsave)
843    }
844
845    /// X86 specific call that gets the current vcpu's "xsave struct" via `KVM_GET_XSAVE2`.
846    ///
847    /// See the documentation for `KVM_GET_XSAVE2` in the
848    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
849    ///
850    /// # Arguments
851    ///
852    /// * `xsave` - A mutable reference to an [`Xsave`] instance that will be populated with the
853    ///   current vcpu's "xsave struct".
854    ///
855    /// # Safety
856    ///
857    /// This function is unsafe because there is no guarantee `xsave` is allocated with enough space
858    /// to hold the entire xsave state.
859    ///
860    /// The required size in bytes can be retrieved via `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` and
861    /// can vary depending on features that have been dynamically enabled by `arch_prctl()`. Thus,
862    /// any features must not be enabled dynamically after the required size has been confirmed.
863    ///
864    /// If `xsave` is not large enough, `KVM_GET_XSAVE2` copies data beyond the allocated area,
865    /// possibly causing undefined behavior.
866    ///
867    /// See the documentation for dynamically enabled XSTATE features in the
868    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
869    ///
870    /// # Example
871    ///
872    /// ```rust
873    /// # extern crate vmm_sys_util;
874    /// # use kvm_ioctls::{Kvm, Cap};
875    /// # use kvm_bindings::{Xsave, kvm_xsave, kvm_xsave2};
876    /// # use vmm_sys_util::fam::FamStruct;
877    /// let kvm = Kvm::new().unwrap();
878    /// let vm = kvm.create_vm().unwrap();
879    /// let vcpu = vm.create_vcpu(0).unwrap();
880    /// let xsave_size = vm.check_extension_int(Cap::Xsave2);
881    /// if xsave_size > 0 {
882    ///     let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
883    ///         .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
884    ///     let mut xsave = Xsave::new(fam_size).unwrap();
885    ///     unsafe { vcpu.get_xsave2(&mut xsave).unwrap() };
886    /// }
887    /// ```
888    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
889    pub unsafe fn get_xsave2(&self, xsave: &mut Xsave) -> Result<()> {
890        // SAFETY: Safe as long as `xsave` is allocated with enough space to hold the entire "xsave
891        // struct". That's why this function is unsafe.
892        let ret = unsafe {
893            ioctl_with_mut_ref(self, KVM_GET_XSAVE2(), &mut xsave.as_mut_fam_struct().xsave)
894        };
895        if ret != 0 {
896            return Err(errno::Error::last());
897        }
898        Ok(())
899    }
900
901    /// X86 specific call that sets the vcpu's current "xsave struct".
902    ///
903    /// See the documentation for `KVM_SET_XSAVE` in the
904    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
905    ///
906    /// # Arguments
907    ///
908    /// * `xsave` - xsave struct to be written.
909    ///
910    /// # Safety
911    ///
912    /// The C `kvm_xsave` struct was extended to have a flexible array member (FAM) at the end in
913    /// Linux 5.17. The size can vary depending on features that have been dynamically enabled via
914    /// `arch_prctl()` and the required size can be retrieved via
915    /// `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)`. That means `KVM_SET_XSAVE` may copy data beyond the
916    /// size of the traditional C `kvm_xsave` struct (i.e. 4096 bytes) now.
917    ///
918    /// It is safe if used on Linux prior to 5.17, if no XSTATE features are enabled dynamically or
919    /// if the required size is still within the traditional 4096 bytes even with dynamically
920    /// enabled features. However, if any features are enabled dynamically, it is recommended to use
921    /// `set_xsave2()` instead.
922    ///
923    /// See the documentation for dynamically enabled XSTATE features in the
924    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
925    ///
926    /// Theoretically, it can be made safe by checking which features are enabled in the bit vector
927    /// of the XSTATE header and validating the required size is less than or equal to 4096 bytes.
928    /// However, to do it properly, we would need to extract the XSTATE header from the `kvm_xsave`
929    /// struct, check enabled features, retrieve the required size for each enabled feature (like
930    /// `setup_xstate_cache()` do in Linux) and calculate the total size.
931    ///
932    /// # Example
933    ///
934    /// ```rust
935    /// # use kvm_ioctls::Kvm;
936    /// let kvm = Kvm::new().unwrap();
937    /// let vm = kvm.create_vm().unwrap();
938    /// let vcpu = vm.create_vcpu(0).unwrap();
939    /// let xsave = Default::default();
940    /// // Your `xsave` manipulation here.
941    /// unsafe { vcpu.set_xsave(&xsave).unwrap() };
942    /// ```
943    #[cfg(target_arch = "x86_64")]
944    pub unsafe fn set_xsave(&self, xsave: &kvm_xsave) -> Result<()> {
945        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct.
946        let ret = unsafe { ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) };
947        if ret != 0 {
948            return Err(errno::Error::last());
949        }
950        Ok(())
951    }
952
953    /// Convenience function for doing `KVM_SET_XSAVE` with the FAM-enabled [`Xsave`]
954    /// instead of the pre-5.17 plain [`kvm_xsave`].
955    ///
956    /// # Arguments
957    ///
958    /// * `xsave` - A reference to an [`Xsave`] instance to be set.
959    ///
960    /// # Safety
961    ///
962    /// This function is unsafe because there is no guarantee `xsave` is properly allocated with
963    /// the size that KVM assumes.
964    ///
965    /// The required size in bytes can be retrieved via `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` and
966    /// can vary depending on features that have been dynamically enabled by `arch_prctl()`. Thus,
967    /// any features must not be enabled after the required size has been confirmed.
968    ///
969    /// If `xsave` is not large enough, `KVM_SET_XSAVE` copies data beyond the allocated area to
970    /// the kernel, possibly causing undefined behavior.
971    ///
972    /// See the documentation for dynamically enabled XSTATE features in the
973    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
974    ///
975    /// # Example
976    ///
977    /// ```rust
978    /// # extern crate vmm_sys_util;
979    /// # use kvm_ioctls::{Kvm, Cap};
980    /// # use kvm_bindings::{Xsave, kvm_xsave, kvm_xsave2};
981    /// # use vmm_sys_util::fam::FamStruct;
982    /// let kvm = Kvm::new().unwrap();
983    /// let vm = kvm.create_vm().unwrap();
984    /// let vcpu = vm.create_vcpu(0).unwrap();
985    /// let xsave_size = vm.check_extension_int(Cap::Xsave2);
986    /// if xsave_size > 0 {
987    ///     let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
988    ///         .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
989    ///     let xsave = Xsave::new(fam_size).unwrap();
990    ///     // Your `xsave` manipulation here.
991    ///     unsafe { vcpu.set_xsave2(&xsave).unwrap() };
992    /// }
993    /// ```
994    #[cfg(target_arch = "x86_64")]
995    pub unsafe fn set_xsave2(&self, xsave: &Xsave) -> Result<()> {
996        // SAFETY: we trust the kernel and verified parameters
997        unsafe { self.set_xsave(&xsave.as_fam_struct_ref().xsave) }
998    }
999
1000    /// X86 specific call that returns the vcpu's current "xcrs".
1001    ///
1002    /// See the documentation for `KVM_GET_XCRS` in the
1003    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1004    ///
1005    /// # Arguments
1006    ///
1007    /// * `kvm_xcrs` - xcrs to be read.
1008    ///
1009    /// # Example
1010    ///
1011    /// ```rust
1012    /// # use kvm_ioctls::Kvm;
1013    /// let kvm = Kvm::new().unwrap();
1014    /// let vm = kvm.create_vm().unwrap();
1015    /// let vcpu = vm.create_vcpu(0).unwrap();
1016    /// let xcrs = vcpu.get_xcrs().unwrap();
1017    /// ```
1018    #[cfg(target_arch = "x86_64")]
1019    pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1020        let mut xcrs = Default::default();
1021        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1022        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) };
1023        if ret != 0 {
1024            return Err(errno::Error::last());
1025        }
1026        Ok(xcrs)
1027    }
1028
1029    /// X86 specific call that sets the vcpu's current "xcrs".
1030    ///
1031    /// See the documentation for `KVM_SET_XCRS` in the
1032    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1033    ///
1034    /// # Arguments
1035    ///
1036    /// * `kvm_xcrs` - xcrs to be written.
1037    ///
1038    /// # Example
1039    ///
1040    /// ```rust
1041    /// # use kvm_ioctls::Kvm;
1042    /// let kvm = Kvm::new().unwrap();
1043    /// let vm = kvm.create_vm().unwrap();
1044    /// let vcpu = vm.create_vcpu(0).unwrap();
1045    /// let xcrs = Default::default();
1046    /// // Your `xcrs` manipulation here.
1047    /// vcpu.set_xcrs(&xcrs).unwrap();
1048    /// ```
1049    #[cfg(target_arch = "x86_64")]
1050    pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1051        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1052        let ret = unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) };
1053        if ret != 0 {
1054            return Err(errno::Error::last());
1055        }
1056        Ok(())
1057    }
1058
1059    /// X86 specific call that returns the vcpu's current "debug registers".
1060    ///
1061    /// See the documentation for `KVM_GET_DEBUGREGS` in the
1062    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1063    ///
1064    /// # Arguments
1065    ///
1066    /// * `kvm_debugregs` - debug registers to be read.
1067    ///
1068    /// # Example
1069    ///
1070    /// ```rust
1071    /// # use kvm_ioctls::Kvm;
1072    /// let kvm = Kvm::new().unwrap();
1073    /// let vm = kvm.create_vm().unwrap();
1074    /// let vcpu = vm.create_vcpu(0).unwrap();
1075    /// let debug_regs = vcpu.get_debug_regs().unwrap();
1076    /// ```
1077    #[cfg(target_arch = "x86_64")]
1078    pub fn get_debug_regs(&self) -> Result<kvm_debugregs> {
1079        let mut debug_regs = Default::default();
1080        // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct.
1081        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) };
1082        if ret != 0 {
1083            return Err(errno::Error::last());
1084        }
1085        Ok(debug_regs)
1086    }
1087
1088    /// X86 specific call that sets the vcpu's current "debug registers".
1089    ///
1090    /// See the documentation for `KVM_SET_DEBUGREGS` in the
1091    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1092    ///
1093    /// # Arguments
1094    ///
1095    /// * `kvm_debugregs` - debug registers to be written.
1096    ///
1097    /// # Example
1098    ///
1099    /// ```rust
1100    /// # use kvm_ioctls::Kvm;
1101    /// let kvm = Kvm::new().unwrap();
1102    /// let vm = kvm.create_vm().unwrap();
1103    /// let vcpu = vm.create_vcpu(0).unwrap();
1104    /// let debug_regs = Default::default();
1105    /// // Your `debug_regs` manipulation here.
1106    /// vcpu.set_debug_regs(&debug_regs).unwrap();
1107    /// ```
1108    #[cfg(target_arch = "x86_64")]
1109    pub fn set_debug_regs(&self, debug_regs: &kvm_debugregs) -> Result<()> {
1110        // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct.
1111        let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) };
1112        if ret != 0 {
1113            return Err(errno::Error::last());
1114        }
1115        Ok(())
1116    }
1117
1118    /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1119    /// states of the vcpu.
1120    ///
1121    /// See the documentation for `KVM_GET_VCPU_EVENTS` in the
1122    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1123    ///
1124    /// # Arguments
1125    ///
1126    /// * `kvm_vcpu_events` - vcpu events to be read.
1127    ///
1128    /// # Example
1129    ///
1130    /// ```rust
1131    /// # use kvm_ioctls::{Kvm, Cap};
1132    /// let kvm = Kvm::new().unwrap();
1133    /// if kvm.check_extension(Cap::VcpuEvents) {
1134    ///     let vm = kvm.create_vm().unwrap();
1135    ///     let vcpu = vm.create_vcpu(0).unwrap();
1136    ///     // On arm64, vCPU needs to be initialized before accesing events
1137    ///     #[cfg(target_arch = "aarch64")]
1138    ///     {
1139    ///         let mut kvi = kvm_bindings::kvm_vcpu_init::default();
1140    ///         vm.get_preferred_target(&mut kvi).unwrap();
1141    ///         vcpu.vcpu_init(&kvi).unwrap();
1142    ///     }
1143    ///     let vcpu_events = vcpu.get_vcpu_events().unwrap();
1144    /// }
1145    /// ```
1146    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1147    pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1148        let mut vcpu_events = Default::default();
1149        // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct.
1150        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) };
1151        if ret != 0 {
1152            return Err(errno::Error::last());
1153        }
1154        Ok(vcpu_events)
1155    }
1156
1157    /// Sets pending exceptions, interrupts, and NMIs as well as related states of the vcpu.
1158    ///
1159    /// See the documentation for `KVM_SET_VCPU_EVENTS` in the
1160    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1161    ///
1162    /// # Arguments
1163    ///
1164    /// * `kvm_vcpu_events` - vcpu events to be written.
1165    ///
1166    /// # Example
1167    ///
1168    /// ```rust
1169    /// # use kvm_ioctls::{Kvm, Cap};
1170    /// let kvm = Kvm::new().unwrap();
1171    /// if kvm.check_extension(Cap::VcpuEvents) {
1172    ///     let vm = kvm.create_vm().unwrap();
1173    ///     let vcpu = vm.create_vcpu(0).unwrap();
1174    ///     // On arm64, vCPU needs to be initialized before accesing events
1175    ///     #[cfg(target_arch = "aarch64")]
1176    ///     {
1177    ///         let mut kvi = kvm_bindings::kvm_vcpu_init::default();
1178    ///         vm.get_preferred_target(&mut kvi).unwrap();
1179    ///         vcpu.vcpu_init(&kvi).unwrap();
1180    ///     }
1181    ///     let vcpu_events = Default::default();
1182    ///     // Your `vcpu_events` manipulation here.
1183    ///     vcpu.set_vcpu_events(&vcpu_events).unwrap();
1184    /// }
1185    /// ```
1186    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1187    pub fn set_vcpu_events(&self, vcpu_events: &kvm_vcpu_events) -> Result<()> {
1188        // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct.
1189        let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) };
1190        if ret != 0 {
1191            return Err(errno::Error::last());
1192        }
1193        Ok(())
1194    }
1195
1196    /// Sets the type of CPU to be exposed to the guest and optional features.
1197    ///
1198    /// This initializes an ARM vCPU to the specified type with the specified features
1199    /// and resets the values of all of its registers to defaults. See the documentation for
1200    /// `KVM_ARM_VCPU_INIT`.
1201    ///
1202    /// # Arguments
1203    ///
1204    /// * `kvi` - information about preferred CPU target type and recommended features for it.
1205    ///   For details check the `kvm_vcpu_init` structure in the
1206    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1207    ///
1208    /// # Example
1209    /// ```rust
1210    /// # use kvm_ioctls::Kvm;
1211    /// use kvm_bindings::kvm_vcpu_init;
1212    /// let kvm = Kvm::new().unwrap();
1213    /// let vm = kvm.create_vm().unwrap();
1214    /// let vcpu = vm.create_vcpu(0).unwrap();
1215    ///
1216    /// let mut kvi = kvm_vcpu_init::default();
1217    /// vm.get_preferred_target(&mut kvi).unwrap();
1218    /// vcpu.vcpu_init(&kvi).unwrap();
1219    /// ```
1220    #[cfg(target_arch = "aarch64")]
1221    pub fn vcpu_init(&self, kvi: &kvm_vcpu_init) -> Result<()> {
1222        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1223        // exactly the size of the struct.
1224        let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT(), kvi) };
1225        if ret < 0 {
1226            return Err(errno::Error::last());
1227        }
1228        Ok(())
1229    }
1230
1231    /// Finalizes the configuration of the specified vcpu feature.
1232    ///
1233    /// The vcpu must already have been initialised, enabling the affected feature,
1234    /// by means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set
1235    /// in features[].
1236    ///
1237    /// For affected vcpu features, this is a mandatory step that must be performed before
1238    /// the vcpu is fully usable.
1239    ///
1240    /// Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be configured
1241    /// by use of ioctls such as KVM_SET_ONE_REG. The exact configuration that should be
1242    /// performaned and how to do it are feature-dependent.
1243    ///
1244    /// Other calls that depend on a particular feature being finalized, such as KVM_RUN,
1245    /// KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with -EPERM unless
1246    /// the feature has already been finalized by means of a KVM_ARM_VCPU_FINALIZE call.
1247    ///
1248    /// See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl.
1249    /// [KVM_ARM_VCPU_FINALIZE](https://www.kernel.org/doc/html/latest/virt/kvm/api.html#kvm-arm-vcpu-finalize).
1250    ///
1251    /// # Arguments
1252    ///
1253    /// * `feature` - vCPU features that needs to be finalized.
1254    ///
1255    /// # Example
1256    /// ```rust
1257    /// # use kvm_ioctls::Kvm;
1258    /// use std::arch::is_aarch64_feature_detected;
1259    ///
1260    /// use kvm_bindings::{KVM_ARM_VCPU_SVE, kvm_vcpu_init};
1261    /// let kvm = Kvm::new().unwrap();
1262    /// let vm = kvm.create_vm().unwrap();
1263    /// let vcpu = vm.create_vcpu(0).unwrap();
1264    ///
1265    /// let mut kvi = kvm_vcpu_init::default();
1266    /// vm.get_preferred_target(&mut kvi).unwrap();
1267    /// kvi.features[0] |= 1 << KVM_ARM_VCPU_SVE;
1268    /// if is_aarch64_feature_detected!("sve2") || is_aarch64_feature_detected!("sve") {
1269    ///     vcpu.vcpu_init(&kvi).unwrap();
1270    ///     let feature = KVM_ARM_VCPU_SVE as i32;
1271    ///     vcpu.vcpu_finalize(&feature).unwrap();
1272    /// }
1273    /// ```
1274    #[cfg(target_arch = "aarch64")]
1275    pub fn vcpu_finalize(&self, feature: &std::os::raw::c_int) -> Result<()> {
1276        // SAFETY: This is safe because we know the kernel will only read this
1277        // parameter to select the correct finalization case in KVM.
1278        let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_FINALIZE(), feature) };
1279        if ret < 0 {
1280            return Err(errno::Error::last());
1281        }
1282        Ok(())
1283    }
1284
1285    /// Returns the guest registers that are supported for the
1286    /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1287    ///
1288    /// # Arguments
1289    ///
1290    /// * `reg_list`  - list of registers (input/output). For details check the `kvm_reg_list`
1291    ///   structure in the
1292    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1293    ///
1294    /// # Example
1295    ///
1296    /// ```rust
1297    /// # use kvm_ioctls::Kvm;
1298    /// # use kvm_bindings::RegList;
1299    /// let kvm = Kvm::new().unwrap();
1300    /// let vm = kvm.create_vm().unwrap();
1301    /// let vcpu = vm.create_vcpu(0).unwrap();
1302    ///
1303    /// // KVM_GET_REG_LIST on Aarch64 demands that the vcpus be initialized.
1304    /// # #[cfg(target_arch = "aarch64")]
1305    /// # {
1306    /// let mut kvi = kvm_bindings::kvm_vcpu_init::default();
1307    /// vm.get_preferred_target(&mut kvi).unwrap();
1308    /// vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
1309    ///
1310    /// let mut reg_list = RegList::new(500).unwrap();
1311    /// vcpu.get_reg_list(&mut reg_list).unwrap();
1312    /// assert!(reg_list.as_fam_struct_ref().n > 0);
1313    /// # }
1314    /// ```
1315    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1316    pub fn get_reg_list(&self, reg_list: &mut RegList) -> Result<()> {
1317        let ret =
1318            // SAFETY: This is safe because we allocated the struct and we trust the kernel will read
1319            // exactly the size of the struct.
1320            unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST(), reg_list.as_mut_fam_struct()) };
1321        if ret < 0 {
1322            return Err(errno::Error::last());
1323        }
1324        Ok(())
1325    }
1326
1327    /// Sets processor-specific debug registers and configures the vcpu for handling
1328    /// certain guest debug events using the `KVM_SET_GUEST_DEBUG` ioctl.
1329    ///
1330    /// # Arguments
1331    ///
1332    /// * `debug_struct` - control bitfields and debug registers, depending on the specific architecture.
1333    ///   For details check the `kvm_guest_debug` structure in the
1334    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1335    ///
1336    /// # Example
1337    ///
1338    /// ```rust
1339    /// # use kvm_ioctls::Kvm;
1340    /// # use kvm_bindings::{
1341    /// #     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_SW_BP, kvm_guest_debug_arch, kvm_guest_debug
1342    /// # };
1343    /// let kvm = Kvm::new().unwrap();
1344    /// let vm = kvm.create_vm().unwrap();
1345    /// let vcpu = vm.create_vcpu(0).unwrap();
1346    ///
1347    /// let debug_struct = kvm_guest_debug {
1348    ///     // Configure the vcpu so that a KVM_DEBUG_EXIT would be generated
1349    ///     // when encountering a software breakpoint during execution
1350    ///     control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP,
1351    ///     pad: 0,
1352    ///     // Reset all arch-specific debug registers
1353    ///     arch: Default::default(),
1354    /// };
1355    ///
1356    /// vcpu.set_guest_debug(&debug_struct).unwrap();
1357    /// ```
1358    #[cfg(any(
1359        target_arch = "x86_64",
1360        target_arch = "aarch64",
1361        target_arch = "s390x",
1362        target_arch = "powerpc"
1363    ))]
1364    pub fn set_guest_debug(&self, debug_struct: &kvm_guest_debug) -> Result<()> {
1365        // SAFETY: Safe because we allocated the structure and we trust the kernel.
1366        let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), debug_struct) };
1367        if ret < 0 {
1368            return Err(errno::Error::last());
1369        }
1370        Ok(())
1371    }
1372
1373    /// Sets the value of one register for this vCPU.
1374    ///
1375    /// The id of the register is encoded as specified in the kernel documentation
1376    /// for `KVM_SET_ONE_REG`.
1377    ///
1378    /// # Arguments
1379    ///
1380    /// * `reg_id` - ID of the register for which we are setting the value.
1381    /// * `data` - byte slice where the register value will be written to.
1382    ///
1383    /// # Note
1384    ///
1385    /// `data` should be equal or bigger then the register size
1386    /// oterwise function will return EINVAL error
1387    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1388    pub fn set_one_reg(&self, reg_id: u64, data: &[u8]) -> Result<usize> {
1389        let reg_size = reg_size(reg_id);
1390        if data.len() < reg_size {
1391            return Err(errno::Error::new(libc::EINVAL));
1392        }
1393        let onereg = kvm_one_reg {
1394            id: reg_id,
1395            addr: data.as_ptr() as u64,
1396        };
1397        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1398        // exactly the size of the struct.
1399        let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1400        if ret < 0 {
1401            return Err(errno::Error::last());
1402        }
1403        Ok(reg_size)
1404    }
1405
1406    /// Writes the value of the specified vCPU register into provided buffer.
1407    ///
1408    /// The id of the register is encoded as specified in the kernel documentation
1409    /// for `KVM_GET_ONE_REG`.
1410    ///
1411    /// # Arguments
1412    ///
1413    /// * `reg_id` - ID of the register.
1414    /// * `data` - byte slice where the register value will be written to.
1415    /// # Note
1416    ///
1417    /// `data` should be equal or bigger then the register size
1418    /// oterwise function will return EINVAL error
1419    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1420    pub fn get_one_reg(&self, reg_id: u64, data: &mut [u8]) -> Result<usize> {
1421        let reg_size = reg_size(reg_id);
1422        if data.len() < reg_size {
1423            return Err(errno::Error::new(libc::EINVAL));
1424        }
1425        let mut onereg = kvm_one_reg {
1426            id: reg_id,
1427            addr: data.as_ptr() as u64,
1428        };
1429        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1430        // exactly the size of the struct.
1431        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_ONE_REG(), &mut onereg) };
1432        if ret < 0 {
1433            return Err(errno::Error::last());
1434        }
1435        Ok(reg_size)
1436    }
1437
1438    /// Notify the guest about the vCPU being paused.
1439    ///
1440    /// See the documentation for `KVM_KVMCLOCK_CTRL` in the
1441    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1442    #[cfg(target_arch = "x86_64")]
1443    pub fn kvmclock_ctrl(&self) -> Result<()> {
1444        // SAFETY: Safe because we know that our file is a KVM fd and that the request
1445        // is one of the ones defined by kernel.
1446        let ret = unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) };
1447        if ret != 0 {
1448            return Err(errno::Error::last());
1449        }
1450        Ok(())
1451    }
1452
1453    /// Triggers the running of the current virtual CPU returning an exit reason.
1454    ///
1455    /// See documentation for `KVM_RUN`.
1456    ///
1457    /// # Example
1458    ///
1459    /// Running some dummy code on x86_64 that immediately halts the vCPU. Based on
1460    /// [https://lwn.net/Articles/658511/](https://lwn.net/Articles/658511/).
1461    ///
1462    /// ```rust
1463    /// # use std::io::Write;
1464    /// # use std::ptr::null_mut;
1465    /// # use std::slice;
1466    /// # use kvm_ioctls::{Kvm, VcpuExit};
1467    /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES};
1468    /// # let kvm = Kvm::new().unwrap();
1469    /// # let vm = kvm.create_vm().unwrap();
1470    ///
1471    /// # #[cfg(target_arch = "x86_64")]
1472    /// # {
1473    /// let mem_size = 0x4000;
1474    /// let guest_addr: u64 = 0x1000;
1475    /// let load_addr: *mut u8 = unsafe {
1476    ///     libc::mmap(
1477    ///         null_mut(),
1478    ///         mem_size,
1479    ///         libc::PROT_READ | libc::PROT_WRITE,
1480    ///         libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
1481    ///         -1,
1482    ///         0,
1483    ///     ) as *mut u8
1484    /// };
1485    ///
1486    /// let mem_region = kvm_userspace_memory_region {
1487    ///     slot: 0,
1488    ///     guest_phys_addr: guest_addr,
1489    ///     memory_size: mem_size as u64,
1490    ///     userspace_addr: load_addr as u64,
1491    ///     flags: 0,
1492    /// };
1493    /// unsafe { vm.set_user_memory_region(mem_region).unwrap() };
1494    ///
1495    /// // Dummy x86 code that just calls halt.
1496    /// let x86_code = [0xf4 /* hlt */];
1497    ///
1498    /// // Write the code in the guest memory. This will generate a dirty page.
1499    /// unsafe {
1500    ///     let mut slice = slice::from_raw_parts_mut(load_addr, mem_size);
1501    ///     slice.write(&x86_code).unwrap();
1502    /// }
1503    ///
1504    /// let mut vcpu_fd = vm.create_vcpu(0).unwrap();
1505    ///
1506    /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap();
1507    /// vcpu_sregs.cs.base = 0;
1508    /// vcpu_sregs.cs.selector = 0;
1509    /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap();
1510    ///
1511    /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap();
1512    /// // Set the Instruction Pointer to the guest address where we loaded the code.
1513    /// vcpu_regs.rip = guest_addr;
1514    /// vcpu_regs.rax = 2;
1515    /// vcpu_regs.rbx = 3;
1516    /// vcpu_regs.rflags = 2;
1517    /// vcpu_fd.set_regs(&vcpu_regs).unwrap();
1518    ///
1519    /// loop {
1520    ///     match vcpu_fd.run().expect("run failed") {
1521    ///         VcpuExit::Hlt => {
1522    ///             break;
1523    ///         }
1524    ///         exit_reason => panic!("unexpected exit reason: {:?}", exit_reason),
1525    ///     }
1526    /// }
1527    /// # }
1528    /// ```
1529    pub fn run(&mut self) -> Result<VcpuExit<'_>> {
1530        // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result.
1531        let ret = unsafe { ioctl(self, KVM_RUN()) };
1532        if ret == 0 {
1533            let run = self.kvm_run_ptr.as_mut_ref();
1534            match run.exit_reason {
1535                // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding
1536                // when upgrading to a different kernel version
1537                KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1538                KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1539                KVM_EXIT_IO => {
1540                    let run_start = run as *mut kvm_run as *mut u8;
1541                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1542                    // which union field to use.
1543                    let io = unsafe { run.__bindgen_anon_1.io };
1544                    let port = io.port;
1545                    let data_size = io.count as usize * io.size as usize;
1546                    // SAFETY: The data_offset is defined by the kernel to be some number of bytes
1547                    // into the kvm_run stucture, which we have fully mmap'd.
1548                    let data_ptr = unsafe { run_start.offset(io.data_offset as isize) };
1549                    let data_slice =
1550                        // SAFETY: The slice's lifetime is limited to the lifetime of this vCPU, which is equal
1551                        // to the mmap of the `kvm_run` struct that this is slicing from.
1552                        unsafe { std::slice::from_raw_parts_mut::<u8>(data_ptr, data_size) };
1553                    match u32::from(io.direction) {
1554                        KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn(port, data_slice)),
1555                        KVM_EXIT_IO_OUT => Ok(VcpuExit::IoOut(port, data_slice)),
1556                        _ => Err(errno::Error::new(EINVAL)),
1557                    }
1558                }
1559                KVM_EXIT_HYPERCALL => {
1560                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1561                    // which union field to use.
1562                    let hypercall = unsafe { &mut run.__bindgen_anon_1.hypercall };
1563                    Ok(VcpuExit::Hypercall(HypercallExit {
1564                        nr: hypercall.nr,
1565                        args: hypercall.args,
1566                        ret: &mut hypercall.ret,
1567                        // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1568                        // which union field to use.
1569                        longmode: unsafe { hypercall.__bindgen_anon_1.longmode },
1570                    }))
1571                }
1572                KVM_EXIT_DEBUG => {
1573                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1574                    // which union field to use.
1575                    let debug = unsafe { run.__bindgen_anon_1.debug };
1576                    Ok(VcpuExit::Debug(debug.arch))
1577                }
1578                KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1579                KVM_EXIT_MMIO => {
1580                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1581                    // which union field to use.
1582                    let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1583                    let addr = mmio.phys_addr;
1584                    let len = mmio.len as usize;
1585                    let data_slice = &mut mmio.data[..len];
1586                    if mmio.is_write != 0 {
1587                        Ok(VcpuExit::MmioWrite(addr, data_slice))
1588                    } else {
1589                        Ok(VcpuExit::MmioRead(addr, data_slice))
1590                    }
1591                }
1592                KVM_EXIT_X86_RDMSR => {
1593                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1594                    // which union field to use.
1595                    let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1596                    let exit = ReadMsrExit {
1597                        error: &mut msr.error,
1598                        reason: MsrExitReason::from_bits_truncate(msr.reason),
1599                        index: msr.index,
1600                        data: &mut msr.data,
1601                    };
1602                    Ok(VcpuExit::X86Rdmsr(exit))
1603                }
1604                KVM_EXIT_X86_WRMSR => {
1605                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1606                    // which union field to use.
1607                    let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1608                    let exit = WriteMsrExit {
1609                        error: &mut msr.error,
1610                        reason: MsrExitReason::from_bits_truncate(msr.reason),
1611                        index: msr.index,
1612                        data: msr.data,
1613                    };
1614                    Ok(VcpuExit::X86Wrmsr(exit))
1615                }
1616                KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1617                KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1618                KVM_EXIT_FAIL_ENTRY => {
1619                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1620                    // which union field to use.
1621                    let fail_entry = unsafe { &mut run.__bindgen_anon_1.fail_entry };
1622                    Ok(VcpuExit::FailEntry(
1623                        fail_entry.hardware_entry_failure_reason,
1624                        fail_entry.cpu,
1625                    ))
1626                }
1627                KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1628                KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1629                KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1630                KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1631                KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1632                KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1633                KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1634                KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1635                KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1636                KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1637                KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1638                KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1639                KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1640                KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1641                KVM_EXIT_SYSTEM_EVENT => {
1642                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1643                    // which union field to use.
1644                    let system_event = unsafe { &mut run.__bindgen_anon_1.system_event };
1645                    let ndata = system_event.ndata;
1646                    // SAFETY: Safe because we only populate with valid data (based on ndata)
1647                    let data = unsafe { &system_event.__bindgen_anon_1.data[0..ndata as usize] };
1648                    Ok(VcpuExit::SystemEvent(system_event.type_, data))
1649                }
1650                KVM_EXIT_S390_STSI => Ok(VcpuExit::S390Stsi),
1651                KVM_EXIT_IOAPIC_EOI => {
1652                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1653                    // which union field to use.
1654                    let eoi = unsafe { &mut run.__bindgen_anon_1.eoi };
1655                    Ok(VcpuExit::IoapicEoi(eoi.vector))
1656                }
1657                KVM_EXIT_HYPERV => Ok(VcpuExit::Hyperv),
1658                r => Ok(VcpuExit::Unsupported(r)),
1659            }
1660        } else {
1661            let errno = errno::Error::last();
1662            let run = self.kvm_run_ptr.as_mut_ref();
1663            // From https://docs.kernel.org/virt/kvm/api.html#kvm-run :
1664            //
1665            // KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it accompanies
1666            // a return code of ‘-1’, not ‘0’! errno will always be set to EFAULT or EHWPOISON
1667            // when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume kvm_run.exit_reason
1668            // is stale/undefined for all other error numbers.
1669            if ret == -1
1670                && (errno == errno::Error::new(libc::EFAULT)
1671                    || errno == errno::Error::new(libc::EHWPOISON))
1672                && run.exit_reason == KVM_EXIT_MEMORY_FAULT
1673            {
1674                // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1675                // which union field to use.
1676                let fault = unsafe { &mut run.__bindgen_anon_1.memory_fault };
1677                Ok(VcpuExit::MemoryFault {
1678                    flags: fault.flags,
1679                    gpa: fault.gpa,
1680                    size: fault.size,
1681                })
1682            } else {
1683                Err(errno)
1684            }
1685        }
1686    }
1687
1688    /// Returns a mutable reference to the kvm_run structure
1689    pub fn get_kvm_run(&mut self) -> &mut kvm_run {
1690        self.kvm_run_ptr.as_mut_ref()
1691    }
1692
1693    /// Sets the `immediate_exit` flag on the `kvm_run` struct associated with this vCPU to `val`.
1694    pub fn set_kvm_immediate_exit(&mut self, val: u8) {
1695        let kvm_run = self.kvm_run_ptr.as_mut_ref();
1696        kvm_run.immediate_exit = val;
1697    }
1698
1699    /// Returns the vCPU TSC frequency in KHz or an error if the host has unstable TSC.
1700    ///
1701    /// # Example
1702    ///
1703    ///  ```rust
1704    /// # use kvm_ioctls::Kvm;
1705    /// let kvm = Kvm::new().unwrap();
1706    /// let vm = kvm.create_vm().unwrap();
1707    /// let vcpu = vm.create_vcpu(0).unwrap();
1708    /// let tsc_khz = vcpu.get_tsc_khz().unwrap();
1709    /// ```
1710    ///
1711    #[cfg(target_arch = "x86_64")]
1712    pub fn get_tsc_khz(&self) -> Result<u32> {
1713        // SAFETY:  Safe because we know that our file is a KVM fd and that the request is one of
1714        // the ones defined by kernel.
1715        let ret = unsafe { ioctl(self, KVM_GET_TSC_KHZ()) };
1716        if ret >= 0 {
1717            Ok(ret as u32)
1718        } else {
1719            Err(errno::Error::new(ret))
1720        }
1721    }
1722
1723    /// Sets the specified vCPU TSC frequency.
1724    ///
1725    /// # Arguments
1726    ///
1727    /// * `freq` - The frequency unit is KHz as per the KVM API documentation
1728    ///   for `KVM_SET_TSC_KHZ`.
1729    ///
1730    /// # Example
1731    ///
1732    ///  ```rust
1733    /// # use kvm_ioctls::{Cap, Kvm};
1734    /// let kvm = Kvm::new().unwrap();
1735    /// let vm = kvm.create_vm().unwrap();
1736    /// let vcpu = vm.create_vcpu(0).unwrap();
1737    /// if kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl) {
1738    ///     vcpu.set_tsc_khz(1000).unwrap();
1739    /// }
1740    /// ```
1741    ///
1742    #[cfg(target_arch = "x86_64")]
1743    pub fn set_tsc_khz(&self, freq: u32) -> Result<()> {
1744        // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of
1745        // the ones defined by kernel.
1746        let ret = unsafe { ioctl_with_val(self, KVM_SET_TSC_KHZ(), freq as u64) };
1747        if ret < 0 {
1748            Err(errno::Error::last())
1749        } else {
1750            Ok(())
1751        }
1752    }
1753
1754    /// Translates a virtual address according to the vCPU's current address translation mode.
1755    ///
1756    /// The physical address is returned in a `kvm_translation` structure as defined in the
1757    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1758    /// See documentation for `KVM_TRANSLATE`.
1759    ///
1760    /// # Arguments
1761    ///
1762    /// * `gva` - The virtual address to translate.
1763    ///
1764    /// # Example
1765    ///
1766    /// ```rust
1767    /// # use kvm_ioctls::Kvm;
1768    /// let kvm = Kvm::new().unwrap();
1769    /// let vm = kvm.create_vm().unwrap();
1770    /// let vcpu = vm.create_vcpu(0).unwrap();
1771    /// #[cfg(target_arch = "x86_64")]
1772    /// let tr = vcpu.translate_gva(0x10000).unwrap();
1773    /// ```
1774    #[cfg(target_arch = "x86_64")]
1775    pub fn translate_gva(&self, gva: u64) -> Result<kvm_translation> {
1776        let mut tr = kvm_translation {
1777            linear_address: gva,
1778            ..Default::default()
1779        };
1780
1781        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
1782        // write the correct amount of memory to our pointer, and we verify the return result.
1783        let ret = unsafe { ioctl_with_mut_ref(self, KVM_TRANSLATE(), &mut tr) };
1784        if ret != 0 {
1785            return Err(errno::Error::last());
1786        }
1787        Ok(tr)
1788    }
1789
1790    /// Enable the given [`SyncReg`] to be copied to userspace on the next exit
1791    ///
1792    /// # Arguments
1793    ///
1794    /// * `reg` - The [`SyncReg`] to copy out of the guest
1795    ///
1796    /// # Example
1797    ///
1798    ///  ```rust
1799    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1800    /// let kvm = Kvm::new().unwrap();
1801    /// let vm = kvm.create_vm().unwrap();
1802    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1803    /// vcpu.set_sync_valid_reg(SyncReg::Register);
1804    /// vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
1805    /// vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
1806    /// ```
1807    #[cfg(target_arch = "x86_64")]
1808    pub fn set_sync_valid_reg(&mut self, reg: SyncReg) {
1809        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1810        kvm_run.kvm_valid_regs |= reg as u64;
1811    }
1812
1813    /// Tell KVM to copy the given [`SyncReg`] into the guest on the next entry
1814    ///
1815    /// # Arguments
1816    ///
1817    /// * `reg` - The [`SyncReg`] to copy into the guest
1818    ///
1819    /// # Example
1820    ///
1821    ///  ```rust
1822    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1823    /// let kvm = Kvm::new().unwrap();
1824    /// let vm = kvm.create_vm().unwrap();
1825    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1826    /// vcpu.set_sync_dirty_reg(SyncReg::Register);
1827    /// ```
1828    #[cfg(target_arch = "x86_64")]
1829    pub fn set_sync_dirty_reg(&mut self, reg: SyncReg) {
1830        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1831        kvm_run.kvm_dirty_regs |= reg as u64;
1832    }
1833
1834    /// Disable the given [`SyncReg`] to be copied to userspace on the next exit
1835    ///
1836    /// # Arguments
1837    ///
1838    /// * `reg` - The [`SyncReg`] to not copy out of the guest
1839    ///
1840    /// # Example
1841    ///
1842    ///  ```rust
1843    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1844    /// let kvm = Kvm::new().unwrap();
1845    /// let vm = kvm.create_vm().unwrap();
1846    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1847    /// vcpu.clear_sync_valid_reg(SyncReg::Register);
1848    /// ```
1849    #[cfg(target_arch = "x86_64")]
1850    pub fn clear_sync_valid_reg(&mut self, reg: SyncReg) {
1851        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1852        kvm_run.kvm_valid_regs &= !(reg as u64);
1853    }
1854
1855    /// Tell KVM to not copy the given [`SyncReg`] into the guest on the next entry
1856    ///
1857    /// # Arguments
1858    ///
1859    /// * `reg` - The [`SyncReg`] to not copy out into the guest
1860    ///
1861    /// # Example
1862    ///
1863    ///  ```rust
1864    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1865    /// let kvm = Kvm::new().unwrap();
1866    /// let vm = kvm.create_vm().unwrap();
1867    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1868    /// vcpu.clear_sync_dirty_reg(SyncReg::Register);
1869    /// ```
1870    #[cfg(target_arch = "x86_64")]
1871    pub fn clear_sync_dirty_reg(&mut self, reg: SyncReg) {
1872        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1873        kvm_run.kvm_dirty_regs &= !(reg as u64);
1874    }
1875
1876    /// Get the [`kvm_sync_regs`] from the VM
1877    ///
1878    /// # Example
1879    ///
1880    ///  ```rust
1881    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1882    /// let kvm = Kvm::new().unwrap();
1883    /// let vm = kvm.create_vm().unwrap();
1884    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1885    /// if kvm.check_extension(Cap::SyncRegs) {
1886    ///     vcpu.set_sync_valid_reg(SyncReg::Register);
1887    ///     vcpu.run();
1888    ///     let guest_rax = vcpu.sync_regs().regs.rax;
1889    /// }
1890    /// ```
1891    #[cfg(target_arch = "x86_64")]
1892    pub fn sync_regs(&self) -> kvm_sync_regs {
1893        let kvm_run = self.kvm_run_ptr.as_ref();
1894
1895        // SAFETY: Accessing this union field could be out of bounds if the `kvm_run`
1896        // allocation isn't large enough. The `kvm_run` region is set using
1897        // `get_vcpu_map_size`, so this region is in bounds
1898        unsafe { kvm_run.s.regs }
1899    }
1900
1901    /// Get a mutable reference to the [`kvm_sync_regs`] from the VM
1902    ///
1903    /// # Example
1904    ///
1905    ///  ```rust
1906    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1907    /// let kvm = Kvm::new().unwrap();
1908    /// let vm = kvm.create_vm().unwrap();
1909    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1910    /// if kvm.check_extension(Cap::SyncRegs) {
1911    ///     vcpu.set_sync_valid_reg(SyncReg::Register);
1912    ///     vcpu.run();
1913    ///     // Set the guest RAX to 0xdeadbeef
1914    ///     vcpu.sync_regs_mut().regs.rax = 0xdeadbeef;
1915    ///     vcpu.set_sync_dirty_reg(SyncReg::Register);
1916    ///     vcpu.run();
1917    /// }
1918    /// ```
1919    #[cfg(target_arch = "x86_64")]
1920    pub fn sync_regs_mut(&mut self) -> &mut kvm_sync_regs {
1921        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1922
1923        // SAFETY: Accessing this union field could be out of bounds if the `kvm_run`
1924        // allocation isn't large enough. The `kvm_run` region is set using
1925        // `get_vcpu_map_size`, so this region is in bounds
1926        unsafe { &mut kvm_run.s.regs }
1927    }
1928
1929    /// Triggers an SMI on the virtual CPU.
1930    ///
1931    /// See documentation for `KVM_SMI`.
1932    ///
1933    /// ```rust
1934    /// # use kvm_ioctls::{Kvm, Cap};
1935    /// let kvm = Kvm::new().unwrap();
1936    /// let vm = kvm.create_vm().unwrap();
1937    /// let vcpu = vm.create_vcpu(0).unwrap();
1938    /// if kvm.check_extension(Cap::X86Smm) {
1939    ///     vcpu.smi().unwrap();
1940    /// }
1941    /// ```
1942    #[cfg(target_arch = "x86_64")]
1943    pub fn smi(&self) -> Result<()> {
1944        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
1945        let ret = unsafe { ioctl(self, KVM_SMI()) };
1946        match ret {
1947            0 => Ok(()),
1948            _ => Err(errno::Error::last()),
1949        }
1950    }
1951
1952    /// Returns the nested guest state using the `KVM_GET_NESTED_STATE` ioctl.
1953    ///
1954    /// This only works when `KVM_CAP_NESTED_STATE` is available.
1955    ///
1956    /// # Arguments
1957    ///
1958    /// - `buffer`: The buffer to be filled with the new nested state.
1959    ///
1960    /// # Return Value
1961    /// If this returns `None`, KVM doesn't have nested state. Otherwise, the
1962    /// actual length of the state is returned.
1963    ///
1964    /// # Example
1965    ///
1966    /// ```rust
1967    /// # use kvm_ioctls::{Kvm, Cap, KvmNestedStateBuffer};
1968    /// let kvm = Kvm::new().unwrap();
1969    /// let vm = kvm.create_vm().unwrap();
1970    /// let vcpu = vm.create_vcpu(0).unwrap();
1971    /// let mut state_buffer = KvmNestedStateBuffer::empty();
1972    /// if kvm.check_extension(Cap::NestedState) {
1973    ///     vcpu.nested_state(&mut state_buffer).unwrap();
1974    ///     // Next, serialize the actual state into a file or so.
1975    /// }
1976    /// ```
1977    #[cfg(target_arch = "x86_64")]
1978    pub fn nested_state(
1979        &self,
1980        buffer: &mut KvmNestedStateBuffer,
1981    ) -> Result<Option<NonZeroUsize /* actual length of state */>> {
1982        assert_ne!(buffer.size, 0, "buffer should not report a size of zero");
1983
1984        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
1985        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_NESTED_STATE(), buffer) };
1986        match ret {
1987            0 => {
1988                let size = buffer.size as usize;
1989                let just_hdr_size = size_of::<kvm_nested_state>();
1990                if size <= just_hdr_size {
1991                    Ok(None)
1992                } else {
1993                    Ok(Some(NonZeroUsize::new(size).unwrap()))
1994                }
1995            }
1996            _ => Err(errno::Error::last()),
1997        }
1998    }
1999
2000    /// Sets the nested guest state using the `KVM_SET_NESTED_STATE` ioctl.
2001    ///
2002    /// This only works when  `KVM_CAP_NESTED_STATE` is available.
2003    ///
2004    /// # Arguments
2005    ///
2006    /// - `state`: The new state to be put into KVM. The header must report the
2007    ///   `size` of the state properly. The state must be retrieved first using
2008    ///   [`Self::nested_state`].
2009    ///
2010    /// # Example
2011    ///
2012    /// ```rust
2013    /// # use kvm_ioctls::{Kvm, Cap, KvmNestedStateBuffer};
2014    /// let kvm = Kvm::new().unwrap();
2015    /// let vm = kvm.create_vm().unwrap();
2016    /// let vcpu = vm.create_vcpu(0).unwrap();
2017    /// if kvm.check_extension(Cap::NestedState) {
2018    ///     let mut state_buffer = KvmNestedStateBuffer::empty();
2019    ///     vcpu.nested_state(&mut state_buffer).unwrap();
2020    ///     // Rename the variable to better reflect the role.
2021    ///     let old_state = state_buffer;
2022    ///
2023    ///     // now assume we transfer the state to a new location
2024    ///     // and load it back into kvm:
2025    ///     vcpu.set_nested_state(&old_state).unwrap();
2026    /// }
2027    /// ```
2028    #[cfg(target_arch = "x86_64")]
2029    pub fn set_nested_state(&self, state: &KvmNestedStateBuffer) -> Result<()> {
2030        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
2031        let ret = unsafe { ioctl_with_ref(self, KVM_SET_NESTED_STATE(), state) };
2032        match ret {
2033            0 => Ok(()),
2034            _ => Err(errno::Error::last()),
2035        }
2036    }
2037
2038    /// Queues an NMI on the thread's vcpu. Only usable if `KVM_CAP_USER_NMI`
2039    /// is available.
2040    ///
2041    /// See the documentation for `KVM_NMI`.
2042    ///
2043    /// # Example
2044    ///
2045    /// ```rust
2046    /// # use kvm_ioctls::{Kvm, Cap};
2047    /// let kvm = Kvm::new().unwrap();
2048    /// let vm = kvm.create_vm().unwrap();
2049    /// let vcpu = vm.create_vcpu(0).unwrap();
2050    /// if kvm.check_extension(Cap::UserNmi) {
2051    ///     vcpu.nmi().unwrap();
2052    /// }
2053    /// ```
2054    #[cfg(target_arch = "x86_64")]
2055    pub fn nmi(&self) -> Result<()> {
2056        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
2057        let ret = unsafe { ioctl(self, KVM_NMI()) };
2058        match ret {
2059            0 => Ok(()),
2060            _ => Err(errno::Error::last()),
2061        }
2062    }
2063
2064    /// Maps the coalesced MMIO ring page. This allows reading entries from
2065    /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read).
2066    ///
2067    /// # Returns
2068    ///
2069    /// Returns an error if the buffer could not be mapped, usually because
2070    /// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
2071    /// is not available.
2072    ///
2073    /// # Examples
2074    ///
2075    /// ```rust
2076    /// # use kvm_ioctls::{Kvm, Cap};
2077    /// let kvm = Kvm::new().unwrap();
2078    /// let vm = kvm.create_vm().unwrap();
2079    /// let mut vcpu = vm.create_vcpu(0).unwrap();
2080    /// if kvm.check_extension(Cap::CoalescedMmio) {
2081    ///     vcpu.map_coalesced_mmio_ring().unwrap();
2082    /// }
2083    /// ```
2084    pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> {
2085        if self.coalesced_mmio_ring.is_none() {
2086            let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?;
2087            self.coalesced_mmio_ring = Some(ring);
2088        }
2089        Ok(())
2090    }
2091
2092    /// Read a single entry from the coalesced MMIO ring.
2093    /// For entries to be appended to the ring by the kernel, addresses must be registered
2094    /// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()).
2095    ///
2096    /// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand.
2097    ///
2098    /// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`.
2099    ///
2100    /// # Returns
2101    ///
2102    /// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring)
2103    ///   was not called beforehand.
2104    /// * [`Ok<None>`] if the ring is empty.
2105    /// * [`Ok<Some<kvm_coalesced_mmio>>`] if an entry was successfully read.
2106    pub fn coalesced_mmio_read(&mut self) -> Result<Option<kvm_coalesced_mmio>> {
2107        self.coalesced_mmio_ring
2108            .as_mut()
2109            .ok_or(errno::Error::new(libc::EIO))
2110            .map(|ring| ring.read_entry())
2111    }
2112}
2113
2114/// Helper function to create a new `VcpuFd`.
2115///
2116/// This should not be exported as a public function because the preferred way is to use
2117/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because
2118/// then it would be exported with the public `VcpuFd` interface.
2119pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd {
2120    VcpuFd {
2121        vcpu,
2122        kvm_run_ptr,
2123        coalesced_mmio_ring: None,
2124    }
2125}
2126
2127impl AsRawFd for VcpuFd {
2128    fn as_raw_fd(&self) -> RawFd {
2129        self.vcpu.as_raw_fd()
2130    }
2131}
2132
2133#[cfg(test)]
2134mod tests {
2135    #![allow(clippy::undocumented_unsafe_blocks)]
2136
2137    use super::*;
2138    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
2139    use crate::cap::Cap;
2140    use crate::ioctls::system::Kvm;
2141    use std::ptr::NonNull;
2142
2143    // Helper function for memory mapping `size` bytes of anonymous memory.
2144    // Panics if the mmap fails.
2145    fn mmap_anonymous(size: usize) -> NonNull<u8> {
2146        use std::ptr::null_mut;
2147
2148        let addr = unsafe {
2149            libc::mmap(
2150                null_mut(),
2151                size,
2152                libc::PROT_READ | libc::PROT_WRITE,
2153                libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
2154                -1,
2155                0,
2156            )
2157        };
2158        if addr == libc::MAP_FAILED {
2159            panic!("mmap failed.");
2160        }
2161
2162        NonNull::new(addr).unwrap().cast()
2163    }
2164
2165    #[test]
2166    fn test_create_vcpu() {
2167        let kvm = Kvm::new().unwrap();
2168        let vm = kvm.create_vm().unwrap();
2169
2170        vm.create_vcpu(0).unwrap();
2171    }
2172
2173    #[cfg(target_arch = "x86_64")]
2174    #[test]
2175    fn test_get_cpuid() {
2176        let kvm = Kvm::new().unwrap();
2177        if kvm.check_extension(Cap::ExtCpuid) {
2178            let vm = kvm.create_vm().unwrap();
2179            let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2180            let ncpuids = cpuid.as_slice().len();
2181            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2182            let nr_vcpus = kvm.get_nr_vcpus();
2183            for cpu_idx in 0..nr_vcpus {
2184                let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap();
2185                vcpu.set_cpuid2(&cpuid).unwrap();
2186                let retrieved_cpuid = vcpu.get_cpuid2(ncpuids).unwrap();
2187                // Only check the first few leafs as some (e.g. 13) are reserved.
2188                assert_eq!(cpuid.as_slice()[..3], retrieved_cpuid.as_slice()[..3]);
2189            }
2190        }
2191    }
2192
2193    #[cfg(target_arch = "x86_64")]
2194    #[test]
2195    fn test_get_cpuid_fail_num_entries_too_high() {
2196        let kvm = Kvm::new().unwrap();
2197        if kvm.check_extension(Cap::ExtCpuid) {
2198            let vm = kvm.create_vm().unwrap();
2199            let vcpu = vm.create_vcpu(0).unwrap();
2200            let err_cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES + 1_usize).err();
2201            assert_eq!(err_cpuid.unwrap().errno(), libc::ENOMEM);
2202        }
2203    }
2204
2205    #[cfg(target_arch = "x86_64")]
2206    #[test]
2207    fn test_get_cpuid_fail_num_entries_too_small() {
2208        let kvm = Kvm::new().unwrap();
2209        if kvm.check_extension(Cap::ExtCpuid) {
2210            let vm = kvm.create_vm().unwrap();
2211            let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2212            let ncpuids = cpuid.as_slice().len();
2213            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2214            let nr_vcpus = kvm.get_nr_vcpus();
2215            for cpu_idx in 0..nr_vcpus {
2216                let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap();
2217                vcpu.set_cpuid2(&cpuid).unwrap();
2218                let err = vcpu.get_cpuid2(ncpuids - 1_usize).err();
2219                assert_eq!(err.unwrap().errno(), libc::E2BIG);
2220            }
2221        }
2222    }
2223
2224    #[cfg(target_arch = "x86_64")]
2225    #[test]
2226    fn test_set_cpuid() {
2227        let kvm = Kvm::new().unwrap();
2228        if kvm.check_extension(Cap::ExtCpuid) {
2229            let vm = kvm.create_vm().unwrap();
2230            let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2231            let ncpuids = cpuid.as_slice().len();
2232            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2233            let vcpu = vm.create_vcpu(0).unwrap();
2234
2235            // Setting Manufacturer ID
2236            {
2237                let entries = cpuid.as_mut_slice();
2238                for entry in entries.iter_mut() {
2239                    if entry.function == 0 {
2240                        // " KVMKVMKVM "
2241                        entry.ebx = 0x4b4d564b;
2242                        entry.ecx = 0x564b4d56;
2243                        entry.edx = 0x4d;
2244                    }
2245                }
2246            }
2247            vcpu.set_cpuid2(&cpuid).unwrap();
2248            let cpuid_0 = vcpu.get_cpuid2(ncpuids).unwrap();
2249            for entry in cpuid_0.as_slice() {
2250                if entry.function == 0 {
2251                    assert_eq!(entry.ebx, 0x4b4d564b);
2252                    assert_eq!(entry.ecx, 0x564b4d56);
2253                    assert_eq!(entry.edx, 0x4d);
2254                }
2255            }
2256
2257            // Disabling Intel SHA extensions.
2258            const EBX_SHA_SHIFT: u32 = 29;
2259            let mut ebx_sha_off = 0u32;
2260            {
2261                let entries = cpuid.as_mut_slice();
2262                for entry in entries.iter_mut() {
2263                    if entry.function == 7 && entry.ecx == 0 {
2264                        entry.ebx &= !(1 << EBX_SHA_SHIFT);
2265                        ebx_sha_off = entry.ebx;
2266                    }
2267                }
2268            }
2269            vcpu.set_cpuid2(&cpuid).unwrap();
2270            let cpuid_1 = vcpu.get_cpuid2(ncpuids).unwrap();
2271            for entry in cpuid_1.as_slice() {
2272                if entry.function == 7 && entry.ecx == 0 {
2273                    assert_eq!(entry.ebx, ebx_sha_off);
2274                }
2275            }
2276        }
2277    }
2278
2279    #[cfg(target_arch = "x86_64")]
2280    #[allow(non_snake_case)]
2281    #[test]
2282    fn test_fpu() {
2283        // as per https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/fpu/internal.h
2284        let KVM_FPU_CWD: usize = 0x37f;
2285        let KVM_FPU_MXCSR: usize = 0x1f80;
2286        let kvm = Kvm::new().unwrap();
2287        let vm = kvm.create_vm().unwrap();
2288        let vcpu = vm.create_vcpu(0).unwrap();
2289        let mut fpu: kvm_fpu = kvm_fpu {
2290            fcw: KVM_FPU_CWD as u16,
2291            mxcsr: KVM_FPU_MXCSR as u32,
2292            ..Default::default()
2293        };
2294
2295        fpu.fcw = KVM_FPU_CWD as u16;
2296        fpu.mxcsr = KVM_FPU_MXCSR as u32;
2297
2298        vcpu.set_fpu(&fpu).unwrap();
2299        assert_eq!(vcpu.get_fpu().unwrap().fcw, KVM_FPU_CWD as u16);
2300    }
2301
2302    #[cfg(target_arch = "x86_64")]
2303    #[test]
2304    fn lapic_test() {
2305        use std::io::Cursor;
2306        // We might get read of byteorder if we replace mem::transmute with something safer.
2307        use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
2308        // As per https://github.com/torvalds/linux/arch/x86/kvm/lapic.c
2309        // Try to write and read the APIC_ICR (0x300) register which is non-read only and
2310        // one can simply write to it.
2311        let kvm = Kvm::new().unwrap();
2312        assert!(kvm.check_extension(Cap::Irqchip));
2313        let vm = kvm.create_vm().unwrap();
2314        // The get_lapic ioctl will fail if there is no irqchip created beforehand.
2315        vm.create_irq_chip().unwrap();
2316        let vcpu = vm.create_vcpu(0).unwrap();
2317        let mut klapic: kvm_lapic_state = vcpu.get_lapic().unwrap();
2318
2319        let reg_offset = 0x300;
2320        let value = 2_u32;
2321        //try to write and read the APIC_ICR	0x300
2322        let write_slice =
2323            unsafe { &mut *(&mut klapic.regs[reg_offset..] as *mut [i8] as *mut [u8]) };
2324        let mut writer = Cursor::new(write_slice);
2325        writer.write_u32::<LittleEndian>(value).unwrap();
2326        vcpu.set_lapic(&klapic).unwrap();
2327        klapic = vcpu.get_lapic().unwrap();
2328        let read_slice = unsafe { &*(&klapic.regs[reg_offset..] as *const [i8] as *const [u8]) };
2329        let mut reader = Cursor::new(read_slice);
2330        assert_eq!(reader.read_u32::<LittleEndian>().unwrap(), value);
2331    }
2332
2333    #[cfg(target_arch = "x86_64")]
2334    #[test]
2335    fn msrs_test() {
2336        use vmm_sys_util::fam::FamStruct;
2337        let kvm = Kvm::new().unwrap();
2338        let vm = kvm.create_vm().unwrap();
2339        let vcpu = vm.create_vcpu(0).unwrap();
2340
2341        // Set the following MSRs.
2342        let msrs_to_set = [
2343            kvm_msr_entry {
2344                index: 0x0000_0174,
2345                data: 0x0,
2346                ..Default::default()
2347            },
2348            kvm_msr_entry {
2349                index: 0x0000_0175,
2350                data: 0x1,
2351                ..Default::default()
2352            },
2353        ];
2354        let msrs_wrapper = Msrs::from_entries(&msrs_to_set).unwrap();
2355        vcpu.set_msrs(&msrs_wrapper).unwrap();
2356
2357        // Now test that GET_MSRS returns the same.
2358        // Configure the struct to say which entries we want.
2359        let mut returned_kvm_msrs = Msrs::from_entries(&[
2360            kvm_msr_entry {
2361                index: 0x0000_0174,
2362                ..Default::default()
2363            },
2364            kvm_msr_entry {
2365                index: 0x0000_0175,
2366                ..Default::default()
2367            },
2368        ])
2369        .unwrap();
2370        let nmsrs = vcpu.get_msrs(&mut returned_kvm_msrs).unwrap();
2371
2372        // Verify the lengths match.
2373        assert_eq!(nmsrs, msrs_to_set.len());
2374        assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len());
2375
2376        // Verify the contents match.
2377        let returned_kvm_msr_entries = returned_kvm_msrs.as_slice();
2378        for (i, entry) in returned_kvm_msr_entries.iter().enumerate() {
2379            assert_eq!(entry, &msrs_to_set[i]);
2380        }
2381    }
2382
2383    #[cfg(any(
2384        target_arch = "x86_64",
2385        target_arch = "aarch64",
2386        target_arch = "riscv64",
2387        target_arch = "s390x"
2388    ))]
2389    #[test]
2390    fn mpstate_test() {
2391        let kvm = Kvm::new().unwrap();
2392        let vm = kvm.create_vm().unwrap();
2393        let vcpu = vm.create_vcpu(0).unwrap();
2394        let mp_state = vcpu.get_mp_state().unwrap();
2395        vcpu.set_mp_state(mp_state).unwrap();
2396        let other_mp_state = vcpu.get_mp_state().unwrap();
2397        assert_eq!(mp_state, other_mp_state);
2398    }
2399
2400    #[cfg(target_arch = "x86_64")]
2401    #[test]
2402    fn xsave_test() {
2403        use vmm_sys_util::fam::FamStruct;
2404
2405        let kvm = Kvm::new().unwrap();
2406        let vm = kvm.create_vm().unwrap();
2407        let vcpu = vm.create_vcpu(0).unwrap();
2408        let xsave = vcpu.get_xsave().unwrap();
2409        // SAFETY: Safe because no features are enabled dynamically and `xsave` is large enough.
2410        unsafe { vcpu.set_xsave(&xsave).unwrap() };
2411        let other_xsave = vcpu.get_xsave().unwrap();
2412        assert_eq!(&xsave.region[..], &other_xsave.region[..]);
2413
2414        let xsave_size = vm.check_extension_int(Cap::Xsave2);
2415        // only if KVM_CAP_XSAVE2 is supported
2416        if xsave_size > 0 {
2417            let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
2418                .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
2419            let mut xsave2 = Xsave::new(fam_size).unwrap();
2420            // SAFETY: Safe because `xsave2` is allocated with enough space.
2421            unsafe { vcpu.get_xsave2(&mut xsave2).unwrap() };
2422            assert_eq!(
2423                &xsave.region[..],
2424                &xsave2.as_fam_struct_ref().xsave.region[..]
2425            );
2426            // SAFETY: Safe because `xsave2` is allocated with enough space.
2427            unsafe { vcpu.set_xsave2(&xsave2).unwrap() };
2428        }
2429    }
2430
2431    #[cfg(target_arch = "x86_64")]
2432    #[test]
2433    fn xcrs_test() {
2434        let kvm = Kvm::new().unwrap();
2435        let vm = kvm.create_vm().unwrap();
2436        let vcpu = vm.create_vcpu(0).unwrap();
2437        let xcrs = vcpu.get_xcrs().unwrap();
2438        vcpu.set_xcrs(&xcrs).unwrap();
2439        let other_xcrs = vcpu.get_xcrs().unwrap();
2440        assert_eq!(xcrs, other_xcrs);
2441    }
2442
2443    #[cfg(target_arch = "x86_64")]
2444    #[test]
2445    fn debugregs_test() {
2446        let kvm = Kvm::new().unwrap();
2447        let vm = kvm.create_vm().unwrap();
2448        let vcpu = vm.create_vcpu(0).unwrap();
2449        let debugregs = vcpu.get_debug_regs().unwrap();
2450        vcpu.set_debug_regs(&debugregs).unwrap();
2451        let other_debugregs = vcpu.get_debug_regs().unwrap();
2452        assert_eq!(debugregs, other_debugregs);
2453    }
2454
2455    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
2456    #[test]
2457    fn vcpu_events_test() {
2458        let kvm = Kvm::new().unwrap();
2459        if kvm.check_extension(Cap::VcpuEvents) {
2460            let vm = kvm.create_vm().unwrap();
2461            let vcpu = vm.create_vcpu(0).unwrap();
2462
2463            #[cfg(target_arch = "aarch64")]
2464            {
2465                let mut kvi = kvm_vcpu_init::default();
2466                vm.get_preferred_target(&mut kvi).unwrap();
2467                vcpu.vcpu_init(&kvi).unwrap();
2468            }
2469
2470            let vcpu_events = vcpu.get_vcpu_events().unwrap();
2471            vcpu.set_vcpu_events(&vcpu_events).unwrap();
2472            let other_vcpu_events = vcpu.get_vcpu_events().unwrap();
2473            assert_eq!(vcpu_events, other_vcpu_events);
2474        }
2475    }
2476
2477    #[cfg(target_arch = "aarch64")]
2478    #[test]
2479    fn test_run_code() {
2480        use std::io::Write;
2481
2482        let kvm = Kvm::new().unwrap();
2483        let vm = kvm.create_vm().unwrap();
2484        #[rustfmt::skip]
2485        let code = [
2486            0x40, 0x20, 0x80, 0x52, /* mov w0, #0x102 */
2487            0x00, 0x01, 0x00, 0xb9, /* str w0, [x8]; test physical memory write */
2488            0x81, 0x60, 0x80, 0x52, /* mov w1, #0x304 */
2489            0x02, 0x00, 0x80, 0x52, /* mov w2, #0x0 */
2490            0x20, 0x01, 0x40, 0xb9, /* ldr w0, [x9]; test MMIO read */
2491            0x1f, 0x18, 0x14, 0x71, /* cmp w0, #0x506 */
2492            0x20, 0x00, 0x82, 0x1a, /* csel w0, w1, w2, eq */
2493            0x20, 0x01, 0x00, 0xb9, /* str w0, [x9]; test MMIO write */
2494            0x00, 0x80, 0xb0, 0x52, /* mov w0, #0x84000000 */
2495            0x00, 0x00, 0x1d, 0x32, /* orr w0, w0, #0x08 */
2496            0x02, 0x00, 0x00, 0xd4, /* hvc #0x0 */
2497            0x00, 0x00, 0x00, 0x14, /* b <this address>; shouldn't get here, but if so loop forever */
2498        ];
2499
2500        let mem_size = 0x20000;
2501        let load_addr = mmap_anonymous(mem_size).as_ptr();
2502        let guest_addr: u64 = 0x10000;
2503        let slot: u32 = 0;
2504        let mem_region = kvm_userspace_memory_region {
2505            slot,
2506            guest_phys_addr: guest_addr,
2507            memory_size: mem_size as u64,
2508            userspace_addr: load_addr as u64,
2509            flags: KVM_MEM_LOG_DIRTY_PAGES,
2510        };
2511        unsafe {
2512            vm.set_user_memory_region(mem_region).unwrap();
2513        }
2514
2515        unsafe {
2516            // Get a mutable slice of `mem_size` from `load_addr`.
2517            // This is safe because we mapped it before.
2518            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2519            slice.write_all(&code).unwrap();
2520        }
2521
2522        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2523        let mut kvi = kvm_vcpu_init::default();
2524        vm.get_preferred_target(&mut kvi).unwrap();
2525        kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
2526        vcpu_fd.vcpu_init(&kvi).unwrap();
2527
2528        let core_reg_base: u64 = 0x6030_0000_0010_0000;
2529        let mmio_addr: u64 = guest_addr + mem_size as u64;
2530
2531        // Set the PC to the guest address where we loaded the code.
2532        vcpu_fd
2533            .set_one_reg(core_reg_base + 2 * 32, &(guest_addr as u128).to_le_bytes())
2534            .unwrap();
2535
2536        // Set x8 and x9 to the addresses the guest test code needs
2537        vcpu_fd
2538            .set_one_reg(
2539                core_reg_base + 2 * 8,
2540                &(guest_addr as u128 + 0x10000).to_le_bytes(),
2541            )
2542            .unwrap();
2543        vcpu_fd
2544            .set_one_reg(core_reg_base + 2 * 9, &(mmio_addr as u128).to_le_bytes())
2545            .unwrap();
2546
2547        loop {
2548            match vcpu_fd.run().expect("run failed") {
2549                VcpuExit::MmioRead(addr, data) => {
2550                    assert_eq!(addr, mmio_addr);
2551                    assert_eq!(data.len(), 4);
2552                    data[3] = 0x0;
2553                    data[2] = 0x0;
2554                    data[1] = 0x5;
2555                    data[0] = 0x6;
2556                }
2557                VcpuExit::MmioWrite(addr, data) => {
2558                    assert_eq!(addr, mmio_addr);
2559                    assert_eq!(data.len(), 4);
2560                    assert_eq!(data[3], 0x0);
2561                    assert_eq!(data[2], 0x0);
2562                    assert_eq!(data[1], 0x3);
2563                    assert_eq!(data[0], 0x4);
2564                    // The code snippet dirties one page at guest_addr + 0x10000.
2565                    // The code page should not be dirty, as it's not written by the guest.
2566                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2567                    let dirty_pages: u32 = dirty_pages_bitmap
2568                        .into_iter()
2569                        .map(|page| page.count_ones())
2570                        .sum();
2571                    assert_eq!(dirty_pages, 1);
2572                }
2573                VcpuExit::SystemEvent(type_, data) => {
2574                    assert_eq!(type_, KVM_SYSTEM_EVENT_SHUTDOWN);
2575                    assert_eq!(data[0], 0);
2576                    break;
2577                }
2578                r => panic!("unexpected exit reason: {:?}", r),
2579            }
2580        }
2581    }
2582
2583    #[cfg(target_arch = "riscv64")]
2584    #[test]
2585    fn test_run_code() {
2586        use std::io::Write;
2587
2588        let kvm = Kvm::new().unwrap();
2589        let vm = kvm.create_vm().unwrap();
2590        #[rustfmt::skip]
2591        let code = [
2592            0x13, 0x05, 0x50, 0x40, // li   a0, 0x0405;
2593            0x23, 0x20, 0xac, 0x00, // sw   a0, 0(s8);  test physical memory write
2594            0x03, 0xa5, 0x0c, 0x00, // lw   a0, 0(s9);  test MMIO read
2595            0x93, 0x05, 0x70, 0x60, // li   a1, 0x0607;
2596            0x23, 0xa0, 0xbc, 0x00, // sw   a1, 0(s9);  test MMIO write
2597            0x6f, 0x00, 0x00, 0x00, // j .; shouldn't get here, but if so loop forever
2598        ];
2599
2600        let mem_size = 0x20000;
2601        let load_addr = mmap_anonymous(mem_size).as_ptr();
2602        let guest_addr: u64 = 0x10000;
2603        let slot: u32 = 0;
2604        let mem_region = kvm_userspace_memory_region {
2605            slot,
2606            guest_phys_addr: guest_addr,
2607            memory_size: mem_size as u64,
2608            userspace_addr: load_addr as u64,
2609            flags: KVM_MEM_LOG_DIRTY_PAGES,
2610        };
2611        unsafe {
2612            vm.set_user_memory_region(mem_region).unwrap();
2613        }
2614
2615        unsafe {
2616            // Get a mutable slice of `mem_size` from `load_addr`.
2617            // This is safe because we mapped it before.
2618            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2619            slice.write_all(&code).unwrap();
2620        }
2621
2622        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2623
2624        let core_reg_base: u64 = 0x8030_0000_0200_0000;
2625        let mmio_addr: u64 = guest_addr + mem_size as u64;
2626
2627        // Set the PC to the guest address where we loaded the code.
2628        vcpu_fd
2629            .set_one_reg(core_reg_base, &(guest_addr as u128).to_le_bytes())
2630            .unwrap();
2631
2632        // Set s8 and s9 to the addresses the guest test code needs
2633        vcpu_fd
2634            .set_one_reg(
2635                core_reg_base + 24,
2636                &(guest_addr as u128 + 0x10000).to_le_bytes(),
2637            )
2638            .unwrap();
2639        vcpu_fd
2640            .set_one_reg(core_reg_base + 25, &(mmio_addr as u128).to_le_bytes())
2641            .unwrap();
2642
2643        loop {
2644            match vcpu_fd.run().expect("run failed") {
2645                VcpuExit::MmioRead(addr, data) => {
2646                    assert_eq!(addr, mmio_addr);
2647                    assert_eq!(data.len(), 4);
2648                    data[3] = 0x0;
2649                    data[2] = 0x0;
2650                    data[1] = 0x5;
2651                    data[0] = 0x6;
2652                }
2653                VcpuExit::MmioWrite(addr, data) => {
2654                    assert_eq!(addr, mmio_addr);
2655                    assert_eq!(data.len(), 4);
2656                    assert_eq!(data[3], 0x0);
2657                    assert_eq!(data[2], 0x0);
2658                    assert_eq!(data[1], 0x6);
2659                    assert_eq!(data[0], 0x7);
2660                    // The code snippet dirties one page at guest_addr + 0x10000.
2661                    // The code page should not be dirty, as it's not written by the guest.
2662                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2663                    let dirty_pages: u32 = dirty_pages_bitmap
2664                        .into_iter()
2665                        .map(|page| page.count_ones())
2666                        .sum();
2667                    assert_eq!(dirty_pages, 1);
2668                    break;
2669                }
2670                r => panic!("unexpected exit reason: {:?}", r),
2671            }
2672        }
2673    }
2674
2675    #[cfg(target_arch = "x86_64")]
2676    #[test]
2677    fn test_run_code() {
2678        use std::io::Write;
2679
2680        let kvm = Kvm::new().unwrap();
2681        let vm = kvm.create_vm().unwrap();
2682        // This example is based on https://lwn.net/Articles/658511/
2683        #[rustfmt::skip]
2684        let code = [
2685            0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
2686            0x00, 0xd8, /* add %bl, %al */
2687            0x04, b'0', /* add $'0', %al */
2688            0xee, /* out %al, %dx */
2689            0xec, /* in %dx, %al */
2690            0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/
2691            0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/
2692            0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */
2693            0xf4, /* hlt */
2694        ];
2695        let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007];
2696
2697        let mem_size = 0x4000;
2698        let load_addr = mmap_anonymous(mem_size).as_ptr();
2699        let guest_addr: u64 = 0x1000;
2700        let slot: u32 = 0;
2701        let mem_region = kvm_userspace_memory_region {
2702            slot,
2703            guest_phys_addr: guest_addr,
2704            memory_size: mem_size as u64,
2705            userspace_addr: load_addr as u64,
2706            flags: KVM_MEM_LOG_DIRTY_PAGES,
2707        };
2708        unsafe {
2709            vm.set_user_memory_region(mem_region).unwrap();
2710        }
2711
2712        unsafe {
2713            // Get a mutable slice of `mem_size` from `load_addr`.
2714            // This is safe because we mapped it before.
2715            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2716            slice.write_all(&code).unwrap();
2717        }
2718
2719        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2720
2721        let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap();
2722        assert_ne!(vcpu_sregs.cs.base, 0);
2723        assert_ne!(vcpu_sregs.cs.selector, 0);
2724        vcpu_sregs.cs.base = 0;
2725        vcpu_sregs.cs.selector = 0;
2726        vcpu_fd.set_sregs(&vcpu_sregs).unwrap();
2727
2728        let mut vcpu_regs = vcpu_fd.get_regs().unwrap();
2729        // Set the Instruction Pointer to the guest address where we loaded the code.
2730        vcpu_regs.rip = guest_addr;
2731        vcpu_regs.rax = 2;
2732        vcpu_regs.rbx = 3;
2733        vcpu_regs.rflags = 2;
2734        vcpu_fd.set_regs(&vcpu_regs).unwrap();
2735
2736        let mut debug_struct = kvm_guest_debug {
2737            control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
2738            pad: 0,
2739            arch: kvm_guest_debug_arch {
2740                debugreg: [0, 0, 0, 0, 0, 0, 0, 0],
2741            },
2742        };
2743        vcpu_fd.set_guest_debug(&debug_struct).unwrap();
2744
2745        let mut instr_idx = 0;
2746        loop {
2747            match vcpu_fd.run().expect("run failed") {
2748                VcpuExit::IoIn(addr, data) => {
2749                    assert_eq!(addr, 0x3f8);
2750                    assert_eq!(data.len(), 1);
2751                }
2752                VcpuExit::IoOut(addr, data) => {
2753                    assert_eq!(addr, 0x3f8);
2754                    assert_eq!(data.len(), 1);
2755                    assert_eq!(data[0], b'5');
2756                }
2757                VcpuExit::MmioRead(addr, data) => {
2758                    assert_eq!(addr, 0x8000);
2759                    assert_eq!(data.len(), 1);
2760                }
2761                VcpuExit::MmioWrite(addr, data) => {
2762                    assert_eq!(addr, 0x8000);
2763                    assert_eq!(data.len(), 1);
2764                    assert_eq!(data[0], 0);
2765                }
2766                VcpuExit::Debug(debug) => {
2767                    if instr_idx == expected_rips.len() - 1 {
2768                        // Disabling debugging/single-stepping
2769                        debug_struct.control = 0;
2770                        vcpu_fd.set_guest_debug(&debug_struct).unwrap();
2771                    } else if instr_idx >= expected_rips.len() {
2772                        unreachable!();
2773                    }
2774                    let vcpu_regs = vcpu_fd.get_regs().unwrap();
2775                    assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]);
2776                    assert_eq!(debug.exception, 1);
2777                    assert_eq!(debug.pc, expected_rips[instr_idx]);
2778                    // Check first 15 bits of DR6
2779                    let mask = (1 << 16) - 1;
2780                    assert_eq!(debug.dr6 & mask, 0b100111111110000);
2781                    // Bit 10 in DR7 is always 1
2782                    assert_eq!(debug.dr7, 1 << 10);
2783                    instr_idx += 1;
2784                }
2785                VcpuExit::Hlt => {
2786                    // The code snippet dirties 2 pages:
2787                    // * one when the code itself is loaded in memory;
2788                    // * and one more from the `movl` that writes to address 0x8000
2789                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2790                    let dirty_pages: u32 = dirty_pages_bitmap
2791                        .into_iter()
2792                        .map(|page| page.count_ones())
2793                        .sum();
2794                    assert_eq!(dirty_pages, 2);
2795                    break;
2796                }
2797                r => panic!("unexpected exit reason: {:?}", r),
2798            }
2799        }
2800    }
2801
2802    #[test]
2803    #[cfg(target_arch = "aarch64")]
2804    fn test_get_preferred_target() {
2805        let kvm = Kvm::new().unwrap();
2806        let vm = kvm.create_vm().unwrap();
2807        let vcpu = vm.create_vcpu(0).unwrap();
2808
2809        let mut kvi = kvm_vcpu_init::default();
2810
2811        vm.get_preferred_target(&mut kvi)
2812            .expect("Cannot get preferred target");
2813        vcpu.vcpu_init(&kvi).unwrap();
2814    }
2815
2816    #[test]
2817    #[cfg(target_arch = "aarch64")]
2818    fn test_set_one_reg() {
2819        let kvm = Kvm::new().unwrap();
2820        let vm = kvm.create_vm().unwrap();
2821        let vcpu = vm.create_vcpu(0).unwrap();
2822
2823        let mut kvi = kvm_vcpu_init::default();
2824        vm.get_preferred_target(&mut kvi)
2825            .expect("Cannot get preferred target");
2826        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2827        let data: u128 = 0;
2828        let reg_id: u64 = 0;
2829
2830        vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err();
2831        // Exercising KVM_SET_ONE_REG by trying to alter the data inside the PSTATE register (which is a
2832        // specific aarch64 register).
2833        // This regiseter is 64 bit wide (8 bytes).
2834        const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042;
2835        vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes())
2836            .expect("Failed to set pstate register");
2837
2838        // Trying to set 8 byte register with 7 bytes must fail.
2839        vcpu.set_one_reg(PSTATE_REG_ID, &[0_u8; 7]).unwrap_err();
2840    }
2841
2842    #[test]
2843    #[cfg(target_arch = "aarch64")]
2844    fn test_get_one_reg() {
2845        let kvm = Kvm::new().unwrap();
2846        let vm = kvm.create_vm().unwrap();
2847        let vcpu = vm.create_vcpu(0).unwrap();
2848
2849        let mut kvi = kvm_vcpu_init::default();
2850        vm.get_preferred_target(&mut kvi)
2851            .expect("Cannot get preferred target");
2852        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2853
2854        // PSR (Processor State Register) bits.
2855        // Taken from arch/arm64/include/uapi/asm/ptrace.h.
2856        const PSR_MODE_EL1H: u64 = 0x0000_0005;
2857        const PSR_F_BIT: u64 = 0x0000_0040;
2858        const PSR_I_BIT: u64 = 0x0000_0080;
2859        const PSR_A_BIT: u64 = 0x0000_0100;
2860        const PSR_D_BIT: u64 = 0x0000_0200;
2861        const PSTATE_FAULT_BITS_64: u64 =
2862            PSR_MODE_EL1H | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
2863        let data: u128 = PSTATE_FAULT_BITS_64 as u128;
2864        const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042;
2865        vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes())
2866            .expect("Failed to set pstate register");
2867
2868        let mut bytes = [0_u8; 16];
2869        vcpu.get_one_reg(PSTATE_REG_ID, &mut bytes)
2870            .expect("Failed to get pstate register");
2871        let data = u128::from_le_bytes(bytes);
2872        assert_eq!(data, PSTATE_FAULT_BITS_64 as u128);
2873
2874        // Trying to get 8 byte register with 7 bytes must fail.
2875        vcpu.get_one_reg(PSTATE_REG_ID, &mut [0_u8; 7]).unwrap_err();
2876    }
2877
2878    #[test]
2879    #[cfg(target_arch = "aarch64")]
2880    fn test_get_reg_list() {
2881        let kvm = Kvm::new().unwrap();
2882        let vm = kvm.create_vm().unwrap();
2883        let vcpu = vm.create_vcpu(0).unwrap();
2884
2885        let mut reg_list = RegList::new(1).unwrap();
2886        // KVM_GET_REG_LIST demands that the vcpus be initalized, so we expect this to fail.
2887        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
2888        assert!(err.errno() == libc::ENOEXEC);
2889
2890        let mut kvi = kvm_vcpu_init::default();
2891        vm.get_preferred_target(&mut kvi)
2892            .expect("Cannot get preferred target");
2893        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2894
2895        // KVM_GET_REG_LIST offers us a number of registers for which we have
2896        // not allocated memory, so the first time it fails.
2897        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
2898        assert!(err.errno() == libc::E2BIG);
2899        // SAFETY: This structure is a result from a specific vCPU ioctl
2900        assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0);
2901
2902        // We make use of the number of registers returned to allocate memory and
2903        // try one more time.
2904        // SAFETY: This structure is a result from a specific vCPU ioctl
2905        let mut reg_list =
2906            RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap();
2907        vcpu.get_reg_list(&mut reg_list).unwrap()
2908    }
2909
2910    #[test]
2911    #[cfg(target_arch = "riscv64")]
2912    fn test_set_one_reg() {
2913        let kvm = Kvm::new().unwrap();
2914        let vm = kvm.create_vm().unwrap();
2915        let vcpu = vm.create_vcpu(0).unwrap();
2916
2917        let data: u128 = 0;
2918        let reg_id: u64 = 0;
2919
2920        vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err();
2921        // Exercising KVM_SET_ONE_REG by trying to alter the data inside the A0
2922        // register.
2923        // This regiseter is 64 bit wide (8 bytes).
2924        const A0_REG_ID: u64 = 0x8030_0000_0200_000a;
2925        vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes())
2926            .expect("Failed to set a0 register");
2927
2928        // Trying to set 8 byte register with 7 bytes must fail.
2929        vcpu.set_one_reg(A0_REG_ID, &[0_u8; 7]).unwrap_err();
2930    }
2931
2932    #[test]
2933    #[cfg(target_arch = "riscv64")]
2934    fn test_get_one_reg() {
2935        let kvm = Kvm::new().unwrap();
2936        let vm = kvm.create_vm().unwrap();
2937        let vcpu = vm.create_vcpu(0).unwrap();
2938
2939        const PRESET: u64 = 0x7;
2940        let data: u128 = PRESET as u128;
2941        const A0_REG_ID: u64 = 0x8030_0000_0200_000a;
2942        vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes())
2943            .expect("Failed to set a0 register");
2944
2945        let mut bytes = [0_u8; 16];
2946        vcpu.get_one_reg(A0_REG_ID, &mut bytes)
2947            .expect("Failed to get a0 register");
2948        let data = u128::from_le_bytes(bytes);
2949        assert_eq!(data, PRESET as u128);
2950
2951        // Trying to get 8 byte register with 7 bytes must fail.
2952        vcpu.get_one_reg(A0_REG_ID, &mut [0_u8; 7]).unwrap_err();
2953    }
2954
2955    #[test]
2956    #[cfg(target_arch = "riscv64")]
2957    fn test_get_reg_list() {
2958        let kvm = Kvm::new().unwrap();
2959        let vm = kvm.create_vm().unwrap();
2960        let vcpu = vm.create_vcpu(0).unwrap();
2961
2962        let mut reg_list = RegList::new(1).unwrap();
2963
2964        // KVM_GET_REG_LIST offers us a number of registers for which we have
2965        // not allocated memory, so the first time it fails.
2966        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
2967        assert!(err.errno() == libc::E2BIG);
2968        // SAFETY: This structure is a result from a specific vCPU ioctl
2969        assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0);
2970
2971        // We make use of the number of registers returned to allocate memory and
2972        // try one more time.
2973        // SAFETY: This structure is a result from a specific vCPU ioctl
2974        let mut reg_list =
2975            RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap();
2976        vcpu.get_reg_list(&mut reg_list).unwrap();
2977
2978        // Test get a register list contains 200 registers explicitly
2979        let mut reg_list = RegList::new(200).unwrap();
2980        vcpu.get_reg_list(&mut reg_list).unwrap();
2981    }
2982
2983    #[test]
2984    fn test_get_kvm_run() {
2985        let kvm = Kvm::new().unwrap();
2986        let vm = kvm.create_vm().unwrap();
2987        let mut vcpu = vm.create_vcpu(0).unwrap();
2988        vcpu.kvm_run_ptr.as_mut_ref().immediate_exit = 1;
2989        assert_eq!(vcpu.get_kvm_run().immediate_exit, 1);
2990    }
2991
2992    #[test]
2993    fn test_set_kvm_immediate_exit() {
2994        let kvm = Kvm::new().unwrap();
2995        let vm = kvm.create_vm().unwrap();
2996        let mut vcpu = vm.create_vcpu(0).unwrap();
2997        assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 0);
2998        vcpu.set_kvm_immediate_exit(1);
2999        assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 1);
3000    }
3001
3002    #[test]
3003    #[cfg(target_arch = "x86_64")]
3004    fn test_enable_cap() {
3005        let kvm = Kvm::new().unwrap();
3006        let vm = kvm.create_vm().unwrap();
3007        let mut cap = kvm_enable_cap {
3008            // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled
3009            cap: KVM_CAP_SPLIT_IRQCHIP,
3010            ..Default::default()
3011        };
3012        cap.args[0] = 24;
3013        vm.enable_cap(&cap).unwrap();
3014
3015        let vcpu = vm.create_vcpu(0).unwrap();
3016        if kvm.check_extension(Cap::HypervSynic) {
3017            let cap = kvm_enable_cap {
3018                cap: KVM_CAP_HYPERV_SYNIC,
3019                ..Default::default()
3020            };
3021            vcpu.enable_cap(&cap).unwrap();
3022        }
3023    }
3024    #[cfg(target_arch = "x86_64")]
3025    #[test]
3026    fn test_get_tsc_khz() {
3027        let kvm = Kvm::new().unwrap();
3028        let vm = kvm.create_vm().unwrap();
3029        let vcpu = vm.create_vcpu(0).unwrap();
3030
3031        if !kvm.check_extension(Cap::GetTscKhz) {
3032            vcpu.get_tsc_khz().unwrap_err();
3033        } else {
3034            assert!(vcpu.get_tsc_khz().unwrap() > 0);
3035        }
3036    }
3037
3038    #[cfg(target_arch = "x86_64")]
3039    #[test]
3040    fn test_set_tsc_khz() {
3041        let kvm = Kvm::new().unwrap();
3042        let vm = kvm.create_vm().unwrap();
3043        let vcpu = vm.create_vcpu(0).unwrap();
3044        let freq = vcpu.get_tsc_khz().unwrap();
3045
3046        if !(kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl)) {
3047            vcpu.set_tsc_khz(0).unwrap_err();
3048        } else {
3049            vcpu.set_tsc_khz(freq - 500000).unwrap();
3050            assert_eq!(vcpu.get_tsc_khz().unwrap(), freq - 500000);
3051            vcpu.set_tsc_khz(freq + 500000).unwrap();
3052            assert_eq!(vcpu.get_tsc_khz().unwrap(), freq + 500000);
3053        }
3054    }
3055
3056    #[cfg(target_arch = "x86_64")]
3057    #[test]
3058    fn test_sync_regs() {
3059        let kvm = Kvm::new().unwrap();
3060        let vm = kvm.create_vm().unwrap();
3061        let mut vcpu = vm.create_vcpu(0).unwrap();
3062
3063        // Test setting each valid register
3064        let sync_regs = [
3065            SyncReg::Register,
3066            SyncReg::SystemRegister,
3067            SyncReg::VcpuEvents,
3068        ];
3069        for reg in &sync_regs {
3070            vcpu.set_sync_valid_reg(*reg);
3071            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, *reg as u64);
3072            vcpu.clear_sync_valid_reg(*reg);
3073            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, 0);
3074        }
3075
3076        // Test that multiple valid SyncRegs can be set at the same time
3077        vcpu.set_sync_valid_reg(SyncReg::Register);
3078        vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
3079        vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
3080        assert_eq!(
3081            vcpu.kvm_run_ptr.as_ref().kvm_valid_regs,
3082            SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64
3083        );
3084
3085        // Test setting each dirty register
3086        let sync_regs = [
3087            SyncReg::Register,
3088            SyncReg::SystemRegister,
3089            SyncReg::VcpuEvents,
3090        ];
3091
3092        for reg in &sync_regs {
3093            vcpu.set_sync_dirty_reg(*reg);
3094            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, *reg as u64);
3095            vcpu.clear_sync_dirty_reg(*reg);
3096            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, 0);
3097        }
3098
3099        // Test that multiple dirty SyncRegs can be set at the same time
3100        vcpu.set_sync_dirty_reg(SyncReg::Register);
3101        vcpu.set_sync_dirty_reg(SyncReg::SystemRegister);
3102        vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents);
3103        assert_eq!(
3104            vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs,
3105            SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64
3106        );
3107    }
3108
3109    #[cfg(target_arch = "x86_64")]
3110    #[test]
3111    fn test_sync_regs_with_run() {
3112        use std::io::Write;
3113
3114        let kvm = Kvm::new().unwrap();
3115        let vm = kvm.create_vm().unwrap();
3116        if kvm.check_extension(Cap::SyncRegs) {
3117            // This example is based on https://lwn.net/Articles/658511/
3118            #[rustfmt::skip]
3119            let code = [
3120                0xff, 0xc0, /* inc eax */
3121                0xf4, /* hlt */
3122            ];
3123
3124            let mem_size = 0x4000;
3125            let load_addr = mmap_anonymous(mem_size).as_ptr();
3126            let guest_addr: u64 = 0x1000;
3127            let slot: u32 = 0;
3128            let mem_region = kvm_userspace_memory_region {
3129                slot,
3130                guest_phys_addr: guest_addr,
3131                memory_size: mem_size as u64,
3132                userspace_addr: load_addr as u64,
3133                flags: KVM_MEM_LOG_DIRTY_PAGES,
3134            };
3135            unsafe {
3136                vm.set_user_memory_region(mem_region).unwrap();
3137            }
3138
3139            unsafe {
3140                // Get a mutable slice of `mem_size` from `load_addr`.
3141                // This is safe because we mapped it before.
3142                let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3143                slice.write_all(&code).unwrap();
3144            }
3145
3146            let mut vcpu = vm.create_vcpu(0).unwrap();
3147
3148            let orig_sregs = vcpu.get_sregs().unwrap();
3149
3150            let sync_regs = vcpu.sync_regs_mut();
3151
3152            // Initialize the sregs in sync_regs to be the original sregs
3153            sync_regs.sregs = orig_sregs;
3154            sync_regs.sregs.cs.base = 0;
3155            sync_regs.sregs.cs.selector = 0;
3156
3157            // Set up the guest to attempt to `inc rax`
3158            sync_regs.regs.rip = guest_addr;
3159            sync_regs.regs.rax = 0x8000;
3160            sync_regs.regs.rflags = 2;
3161
3162            // Initialize the sync_reg flags
3163            vcpu.set_sync_valid_reg(SyncReg::Register);
3164            vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
3165            vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
3166            vcpu.set_sync_dirty_reg(SyncReg::Register);
3167            vcpu.set_sync_dirty_reg(SyncReg::SystemRegister);
3168            vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents);
3169
3170            // hlt is the only expected return from guest execution
3171            assert!(matches!(vcpu.run().expect("run failed"), VcpuExit::Hlt));
3172
3173            let regs = vcpu.get_regs().unwrap();
3174
3175            let sync_regs = vcpu.sync_regs();
3176            assert_eq!(regs, sync_regs.regs);
3177            assert_eq!(sync_regs.regs.rax, 0x8001);
3178        }
3179    }
3180
3181    #[test]
3182    #[cfg(target_arch = "x86_64")]
3183    fn test_translate_gva() {
3184        let kvm = Kvm::new().unwrap();
3185        let vm = kvm.create_vm().unwrap();
3186        let vcpu = vm.create_vcpu(0).unwrap();
3187        vcpu.translate_gva(0x10000).unwrap();
3188        assert_eq!(vcpu.translate_gva(0x10000).unwrap().valid, 1);
3189        assert_eq!(
3190            vcpu.translate_gva(0x10000).unwrap().physical_address,
3191            0x10000
3192        );
3193        vcpu.translate_gva(u64::MAX).unwrap();
3194        assert_eq!(vcpu.translate_gva(u64::MAX).unwrap().valid, 0);
3195    }
3196
3197    #[test]
3198    #[cfg(target_arch = "aarch64")]
3199    fn test_vcpu_attr() {
3200        let kvm = Kvm::new().unwrap();
3201        let vm = kvm.create_vm().unwrap();
3202        let vcpu = vm.create_vcpu(0).unwrap();
3203
3204        let dist_attr = kvm_device_attr {
3205            group: KVM_ARM_VCPU_PMU_V3_CTRL,
3206            attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
3207            addr: 0x0,
3208            flags: 0,
3209        };
3210
3211        vcpu.has_device_attr(&dist_attr).unwrap_err();
3212        vcpu.set_device_attr(&dist_attr).unwrap_err();
3213        let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
3214        vm.get_preferred_target(&mut kvi)
3215            .expect("Cannot get preferred target");
3216        kvi.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2) | (1 << KVM_ARM_VCPU_PMU_V3);
3217        vcpu.vcpu_init(&kvi).unwrap();
3218        vcpu.has_device_attr(&dist_attr).unwrap();
3219        vcpu.set_device_attr(&dist_attr).unwrap();
3220    }
3221
3222    #[test]
3223    #[cfg(target_arch = "aarch64")]
3224    fn test_pointer_authentication() {
3225        let kvm = Kvm::new().unwrap();
3226        let vm = kvm.create_vm().unwrap();
3227        let vcpu = vm.create_vcpu(0).unwrap();
3228
3229        let mut kvi = kvm_vcpu_init::default();
3230        vm.get_preferred_target(&mut kvi)
3231            .expect("Cannot get preferred target");
3232        if kvm.check_extension(Cap::ArmPtrAuthAddress) {
3233            kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
3234        }
3235        if kvm.check_extension(Cap::ArmPtrAuthGeneric) {
3236            kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
3237        }
3238        vcpu.vcpu_init(&kvi).unwrap();
3239    }
3240
3241    #[cfg(target_arch = "x86_64")]
3242    #[test]
3243    fn test_userspace_rdmsr_exit() {
3244        use std::io::Write;
3245
3246        let kvm = Kvm::new().unwrap();
3247        let vm = kvm.create_vm().unwrap();
3248        #[rustfmt::skip]
3249        let code = [
3250            0x0F, 0x32, /* rdmsr */
3251            0xF4        /* hlt */
3252        ];
3253
3254        if !vm.check_extension(Cap::X86UserSpaceMsr) {
3255            return;
3256        }
3257        let cap = kvm_enable_cap {
3258            cap: Cap::X86UserSpaceMsr as u32,
3259            args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0],
3260            ..Default::default()
3261        };
3262        vm.enable_cap(&cap).unwrap();
3263
3264        let mem_size = 0x4000;
3265        let load_addr = mmap_anonymous(mem_size).as_ptr();
3266        let guest_addr: u64 = 0x1000;
3267        let slot: u32 = 0;
3268        let mem_region = kvm_userspace_memory_region {
3269            slot,
3270            guest_phys_addr: guest_addr,
3271            memory_size: mem_size as u64,
3272            userspace_addr: load_addr as u64,
3273            flags: 0,
3274        };
3275        unsafe {
3276            vm.set_user_memory_region(mem_region).unwrap();
3277
3278            // Get a mutable slice of `mem_size` from `load_addr`.
3279            // This is safe because we mapped it before.
3280            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3281            slice.write_all(&code).unwrap();
3282        }
3283
3284        let mut vcpu = vm.create_vcpu(0).unwrap();
3285
3286        // Set up special registers
3287        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3288        assert_ne!(vcpu_sregs.cs.base, 0);
3289        assert_ne!(vcpu_sregs.cs.selector, 0);
3290        vcpu_sregs.cs.base = 0;
3291        vcpu_sregs.cs.selector = 0;
3292        vcpu.set_sregs(&vcpu_sregs).unwrap();
3293
3294        // Set the Instruction Pointer to the guest address where we loaded
3295        // the code, and RCX to the MSR to be read.
3296        let mut vcpu_regs = vcpu.get_regs().unwrap();
3297        vcpu_regs.rip = guest_addr;
3298        vcpu_regs.rcx = 0x474f4f00;
3299        vcpu.set_regs(&vcpu_regs).unwrap();
3300
3301        match vcpu.run().unwrap() {
3302            VcpuExit::X86Rdmsr(exit) => {
3303                assert_eq!(exit.reason, MsrExitReason::Unknown);
3304                assert_eq!(exit.index, 0x474f4f00);
3305            }
3306            e => panic!("Unexpected exit: {:?}", e),
3307        }
3308    }
3309
3310    #[cfg(target_arch = "x86_64")]
3311    #[test]
3312    fn test_userspace_hypercall_exit() {
3313        use std::io::Write;
3314
3315        let kvm = Kvm::new().unwrap();
3316        let vm = kvm.create_vm().unwrap();
3317
3318        // Use `vmcall` or `vmmcall` depending on what's supported.
3319        let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
3320        let supports_vmcall = cpuid
3321            .as_slice()
3322            .iter()
3323            .find(|entry| entry.function == 1)
3324            .is_some_and(|entry| entry.ecx & (1 << 5) != 0);
3325        let supports_vmmcall = cpuid
3326            .as_slice()
3327            .iter()
3328            .find(|entry| entry.function == 0x8000_0001)
3329            .is_some_and(|entry| entry.ecx & (1 << 2) != 0);
3330        #[rustfmt::skip]
3331        let code = if supports_vmcall {
3332            [
3333                0x0F, 0x01, 0xC1, /* vmcall */
3334                0xF4              /* hlt */
3335            ]
3336        } else if supports_vmmcall {
3337            [
3338                0x0F, 0x01, 0xD9, /* vmmcall */
3339                0xF4              /* hlt */
3340            ]
3341        } else {
3342            return;
3343        };
3344
3345        if !vm.check_extension(Cap::ExitHypercall) {
3346            return;
3347        }
3348        const KVM_HC_MAP_GPA_RANGE: u64 = 12;
3349        let cap = kvm_enable_cap {
3350            cap: Cap::ExitHypercall as u32,
3351            args: [1 << KVM_HC_MAP_GPA_RANGE, 0, 0, 0],
3352            ..Default::default()
3353        };
3354        vm.enable_cap(&cap).unwrap();
3355
3356        let mem_size = 0x4000;
3357        let load_addr = mmap_anonymous(mem_size).as_ptr();
3358        let guest_addr: u64 = 0x1000;
3359        let slot: u32 = 0;
3360        let mem_region = kvm_userspace_memory_region {
3361            slot,
3362            guest_phys_addr: guest_addr,
3363            memory_size: mem_size as u64,
3364            userspace_addr: load_addr as u64,
3365            flags: 0,
3366        };
3367        unsafe {
3368            vm.set_user_memory_region(mem_region).unwrap();
3369
3370            // Get a mutable slice of `mem_size` from `load_addr`.
3371            // This is safe because we mapped it before.
3372            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3373            slice.write_all(&code).unwrap();
3374        }
3375
3376        let mut vcpu = vm.create_vcpu(0).unwrap();
3377
3378        // Set up special registers
3379        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3380        assert_ne!(vcpu_sregs.cs.base, 0);
3381        assert_ne!(vcpu_sregs.cs.selector, 0);
3382        vcpu_sregs.cs.base = 0;
3383        vcpu_sregs.cs.selector = 0;
3384        vcpu.set_sregs(&vcpu_sregs).unwrap();
3385
3386        // Set the Instruction Pointer to the guest address where we loaded
3387        // the code, and RCX to the MSR to be read.
3388        let mut vcpu_regs = vcpu.get_regs().unwrap();
3389        vcpu_regs.rip = guest_addr;
3390        vcpu_regs.rax = KVM_HC_MAP_GPA_RANGE;
3391        vcpu_regs.rbx = 0x1234000;
3392        vcpu_regs.rcx = 1;
3393        vcpu_regs.rdx = 0;
3394        vcpu.set_regs(&vcpu_regs).unwrap();
3395
3396        match vcpu.run().unwrap() {
3397            VcpuExit::Hypercall(exit) => {
3398                assert_eq!(exit.nr, KVM_HC_MAP_GPA_RANGE);
3399                assert_eq!(exit.args[0], 0x1234000);
3400                assert_eq!(exit.args[1], 1);
3401                assert_eq!(exit.args[2], 0);
3402            }
3403            e => panic!("Unexpected exit: {:?}", e),
3404        }
3405    }
3406
3407    #[cfg(target_arch = "x86_64")]
3408    #[test]
3409    fn test_userspace_wrmsr_exit() {
3410        use std::io::Write;
3411
3412        let kvm = Kvm::new().unwrap();
3413        let vm = kvm.create_vm().unwrap();
3414        #[rustfmt::skip]
3415        let code = [
3416            0x0F, 0x30, /* wrmsr */
3417            0xF4        /* hlt */
3418        ];
3419
3420        if !vm.check_extension(Cap::X86UserSpaceMsr) {
3421            return;
3422        }
3423        let cap = kvm_enable_cap {
3424            cap: Cap::X86UserSpaceMsr as u32,
3425            args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0],
3426            ..Default::default()
3427        };
3428        vm.enable_cap(&cap).unwrap();
3429
3430        let mem_size = 0x4000;
3431        let load_addr = mmap_anonymous(mem_size).as_ptr();
3432        let guest_addr: u64 = 0x1000;
3433        let slot: u32 = 0;
3434        let mem_region = kvm_userspace_memory_region {
3435            slot,
3436            guest_phys_addr: guest_addr,
3437            memory_size: mem_size as u64,
3438            userspace_addr: load_addr as u64,
3439            flags: 0,
3440        };
3441        unsafe {
3442            vm.set_user_memory_region(mem_region).unwrap();
3443
3444            // Get a mutable slice of `mem_size` from `load_addr`.
3445            // This is safe because we mapped it before.
3446            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3447            slice.write_all(&code).unwrap();
3448        }
3449
3450        let mut vcpu = vm.create_vcpu(0).unwrap();
3451
3452        // Set up special registers
3453        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3454        assert_ne!(vcpu_sregs.cs.base, 0);
3455        assert_ne!(vcpu_sregs.cs.selector, 0);
3456        vcpu_sregs.cs.base = 0;
3457        vcpu_sregs.cs.selector = 0;
3458        vcpu.set_sregs(&vcpu_sregs).unwrap();
3459
3460        // Set the Instruction Pointer to the guest address where we loaded
3461        // the code, RCX to the MSR to be written, and EDX:EAX to the data to
3462        // be written.
3463        let mut vcpu_regs = vcpu.get_regs().unwrap();
3464        vcpu_regs.rip = guest_addr;
3465        vcpu_regs.rcx = 0x474f4f00;
3466        vcpu_regs.rax = 0xdeadbeef;
3467        vcpu_regs.rdx = 0xd0c0ffee;
3468        vcpu.set_regs(&vcpu_regs).unwrap();
3469
3470        match vcpu.run().unwrap() {
3471            VcpuExit::X86Wrmsr(exit) => {
3472                assert_eq!(exit.reason, MsrExitReason::Unknown);
3473                assert_eq!(exit.index, 0x474f4f00);
3474                assert_eq!(exit.data & 0xffffffff, 0xdeadbeef);
3475                assert_eq!((exit.data >> 32) & 0xffffffff, 0xd0c0ffee);
3476            }
3477            e => panic!("Unexpected exit: {:?}", e),
3478        }
3479    }
3480
3481    #[test]
3482    #[cfg(target_arch = "x86_64")]
3483    fn test_coalesced_pio() {
3484        use crate::IoEventAddress;
3485        use std::io::Write;
3486
3487        const PORT: u64 = 0x2c;
3488        const DATA: u64 = 0x39;
3489        const SIZE: u32 = 1;
3490
3491        #[rustfmt::skip]
3492        let code = [
3493            0xe6, 0x2c,   // out 0x2c, al
3494            0xf4,         // hlt
3495            0xe6, 0x2c,   // out 0x2c, al
3496            0xf4,         // hlt
3497        ];
3498
3499        let kvm = Kvm::new().unwrap();
3500        let vm = kvm.create_vm().unwrap();
3501        assert!(vm.check_extension(Cap::CoalescedPio));
3502
3503        // Prepare guest memory
3504        let mem_size = 0x4000;
3505        let load_addr = mmap_anonymous(mem_size).as_ptr();
3506        let guest_addr: u64 = 0x1000;
3507        let slot = 0;
3508        let mem_region = kvm_userspace_memory_region {
3509            slot,
3510            guest_phys_addr: guest_addr,
3511            memory_size: mem_size as u64,
3512            userspace_addr: load_addr as u64,
3513            flags: 0,
3514        };
3515
3516        unsafe {
3517            vm.set_user_memory_region(mem_region).unwrap();
3518
3519            // Get a mutable slice of `mem_size` from `load_addr`.
3520            // This is safe because we mapped it before.
3521            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3522            slice.write_all(&code).unwrap();
3523        }
3524
3525        let addr = IoEventAddress::Pio(PORT);
3526        vm.register_coalesced_mmio(addr, SIZE).unwrap();
3527
3528        let mut vcpu = vm.create_vcpu(0).unwrap();
3529
3530        // Map the MMIO ring
3531        vcpu.map_coalesced_mmio_ring().unwrap();
3532
3533        // Set regs
3534        let mut regs = vcpu.get_regs().unwrap();
3535        regs.rip = guest_addr;
3536        regs.rax = DATA;
3537        regs.rflags = 2;
3538        vcpu.set_regs(&regs).unwrap();
3539
3540        // Set sregs
3541        let mut sregs = vcpu.get_sregs().unwrap();
3542        sregs.cs.base = 0;
3543        sregs.cs.selector = 0;
3544        vcpu.set_sregs(&sregs).unwrap();
3545
3546        // Run and check that the exit was caused by the hlt and not the port
3547        // I/O
3548        let exit = vcpu.run().unwrap();
3549        assert!(matches!(exit, VcpuExit::Hlt));
3550
3551        // Check that the ring buffer entry is what we expect
3552        let entry = vcpu.coalesced_mmio_read().unwrap().unwrap();
3553        assert_eq!(entry.phys_addr, PORT);
3554        assert_eq!(entry.len, 1);
3555        assert_eq!(entry.data[0] as u64, DATA);
3556        // SAFETY: this field is a u32 in all variants of the union,
3557        // so access is always safe.
3558        let pio = unsafe { entry.__bindgen_anon_1.pio };
3559        assert_eq!(pio, 1);
3560
3561        // The ring buffer should be empty now
3562        assert!(vcpu.coalesced_mmio_read().unwrap().is_none());
3563
3564        // Unregister and check that the next PIO write triggers an exit
3565        vm.unregister_coalesced_mmio(addr, SIZE).unwrap();
3566        let exit = vcpu.run().unwrap();
3567        let VcpuExit::IoOut(port, data) = exit else {
3568            panic!("Unexpected VM exit: {:?}", exit);
3569        };
3570        assert_eq!(port, PORT as u16);
3571        assert_eq!(data, (DATA as u8).to_le_bytes());
3572    }
3573
3574    #[test]
3575    #[cfg(target_arch = "x86_64")]
3576    fn test_coalesced_mmio() {
3577        use crate::IoEventAddress;
3578        use std::io::Write;
3579
3580        const ADDR: u64 = 0x124;
3581        const DATA: u64 = 0x39;
3582        const SIZE: u32 = 2;
3583
3584        #[rustfmt::skip]
3585        let code = [
3586            0x66, 0x31, 0xFF,        // xor di,di
3587            0x66, 0xBF, 0x24, 0x01,  // mov di, 0x124
3588            0x67, 0x66, 0x89, 0x05,  // mov WORD PTR [di], ax
3589            0xF4,                    // hlt
3590            0x66, 0x31, 0xFF,        // xor di,di
3591            0x66, 0xBF, 0x24, 0x01,  // mov di, 0x124
3592            0x67, 0x66, 0x89, 0x05,  // mov WORD PTR [di], ax
3593            0xF4,                    // hlt
3594        ];
3595
3596        let kvm = Kvm::new().unwrap();
3597        let vm = kvm.create_vm().unwrap();
3598        assert!(vm.check_extension(Cap::CoalescedMmio));
3599
3600        // Prepare guest memory
3601        let mem_size = 0x4000;
3602        let load_addr = mmap_anonymous(mem_size).as_ptr();
3603        let guest_addr: u64 = 0x1000;
3604        let slot: u32 = 0;
3605        let mem_region = kvm_userspace_memory_region {
3606            slot,
3607            guest_phys_addr: guest_addr,
3608            memory_size: mem_size as u64,
3609            userspace_addr: load_addr as u64,
3610            flags: 0,
3611        };
3612
3613        unsafe {
3614            vm.set_user_memory_region(mem_region).unwrap();
3615
3616            // Get a mutable slice of `mem_size` from `load_addr`.
3617            // This is safe because we mapped it before.
3618            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3619            slice.write_all(&code).unwrap();
3620        }
3621
3622        let addr = IoEventAddress::Mmio(ADDR);
3623        vm.register_coalesced_mmio(addr, SIZE).unwrap();
3624
3625        let mut vcpu = vm.create_vcpu(0).unwrap();
3626
3627        // Map the MMIO ring
3628        vcpu.map_coalesced_mmio_ring().unwrap();
3629
3630        // Set regs
3631        let mut regs = vcpu.get_regs().unwrap();
3632        regs.rip = guest_addr;
3633        regs.rax = DATA;
3634        regs.rdx = ADDR;
3635        regs.rflags = 2;
3636        vcpu.set_regs(&regs).unwrap();
3637
3638        // Set sregs
3639        let mut sregs = vcpu.get_sregs().unwrap();
3640        sregs.cs.base = 0;
3641        sregs.cs.selector = 0;
3642        vcpu.set_sregs(&sregs).unwrap();
3643
3644        // Run and check that the exit was caused by the hlt and not the MMIO
3645        // access
3646        let exit = vcpu.run().unwrap();
3647        assert!(matches!(exit, VcpuExit::Hlt));
3648
3649        // Check that the ring buffer entry is what we expect
3650        let entry = vcpu.coalesced_mmio_read().unwrap().unwrap();
3651        assert_eq!(entry.phys_addr, ADDR);
3652        assert_eq!(entry.len, SIZE);
3653        assert_eq!(entry.data[0] as u64, DATA);
3654        // SAFETY: this field is a u32 in all variants of the union,
3655        // so access is always safe.
3656        let pio = unsafe { entry.__bindgen_anon_1.pio };
3657        assert_eq!(pio, 0);
3658
3659        // The ring buffer should be empty now
3660        assert!(vcpu.coalesced_mmio_read().unwrap().is_none());
3661
3662        // Unregister and check that the next MMIO write triggers an exit
3663        vm.unregister_coalesced_mmio(addr, SIZE).unwrap();
3664        let exit = vcpu.run().unwrap();
3665        let VcpuExit::MmioWrite(addr, data) = exit else {
3666            panic!("Unexpected VM exit: {:?}", exit);
3667        };
3668        assert_eq!(addr, ADDR);
3669        assert_eq!(data, (DATA as u16).to_le_bytes());
3670    }
3671
3672    #[test]
3673    #[cfg(target_arch = "x86_64")]
3674    fn test_get_and_set_nested_state() {
3675        let kvm = Kvm::new().unwrap();
3676        let vm = kvm.create_vm().unwrap();
3677        let vcpu = vm.create_vcpu(0).unwrap();
3678
3679        // Ensure that KVM also during runtime never wants more memory than we have pre-allocated
3680        // by the helper type. KVM is expected to report:
3681        // - 128+4096==4224 on SVM
3682        // - 128+8192==8320 on VMX
3683        let kvm_nested_state_size = kvm.check_extension_int(Cap::NestedState) as usize;
3684        assert!(kvm_nested_state_size <= size_of::<KvmNestedStateBuffer>());
3685
3686        let mut state_buffer = KvmNestedStateBuffer::default();
3687        // Ensure that header shows full buffer length.
3688        assert_eq!(
3689            state_buffer.size as usize,
3690            size_of::<KvmNestedStateBuffer>()
3691        );
3692
3693        vcpu.nested_state(&mut state_buffer).unwrap();
3694        let old_state = state_buffer;
3695
3696        // There is no nested guest in this test, so there is no payload.
3697        assert_eq!(state_buffer.size as usize, size_of::<kvm_nested_state>());
3698
3699        vcpu.set_nested_state(&old_state).unwrap();
3700    }
3701}