kvm_ioctls/ioctls/
vcpu.rs

1// Copyright © 2024 Institute of Software, CAS. All rights reserved.
2//
3// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4// SPDX-License-Identifier: Apache-2.0 OR MIT
5//
6// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the THIRD-PARTY file.
9
10// Part of public API
11#[cfg(target_arch = "x86_64")]
12pub use kvm_bindings::nested::KvmNestedStateBuffer;
13
14use kvm_bindings::*;
15use libc::EINVAL;
16use std::fs::File;
17use std::os::unix::io::{AsRawFd, RawFd};
18
19use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result};
20use crate::kvm_ioctls::*;
21use vmm_sys_util::errno;
22use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref};
23#[cfg(target_arch = "x86_64")]
24use {
25    std::num::NonZeroUsize,
26    vmm_sys_util::ioctl::{ioctl_with_mut_ptr, ioctl_with_ptr, ioctl_with_val},
27};
28
29/// Helper method to obtain the size of the register through its id
30#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
31pub fn reg_size(reg_id: u64) -> usize {
32    2_usize.pow(((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT) as u32)
33}
34
35/// Information about a [`VcpuExit`] triggered by an Hypercall (`KVM_EXIT_HYPERCALL`).
36#[derive(Debug)]
37pub struct HypercallExit<'a> {
38    /// The hypercall number.
39    pub nr: u64,
40    /// The arguments for the hypercall.
41    pub args: [u64; 6],
42    /// The return code to be indicated to the guest.
43    pub ret: &'a mut u64,
44    /// Whether the hypercall was executed in long mode.
45    pub longmode: u32,
46}
47
48/// Information about a [`VcpuExit`] triggered by an MSR read (`KVM_EXIT_X86_RDMSR`).
49#[derive(Debug)]
50pub struct ReadMsrExit<'a> {
51    /// Must be set to 1 by the the user if the read access should fail. This
52    /// will inject a #GP fault into the guest when the VCPU is executed
53    /// again.
54    pub error: &'a mut u8,
55    /// The reason for this exit.
56    pub reason: MsrExitReason,
57    /// The MSR the guest wants to read.
58    pub index: u32,
59    /// The data to be supplied by the user as the MSR Contents to the guest.
60    pub data: &'a mut u64,
61}
62
63/// Information about a [`VcpuExit`] triggered by an MSR write (`KVM_EXIT_X86_WRMSR`).
64#[derive(Debug)]
65pub struct WriteMsrExit<'a> {
66    /// Must be set to 1 by the the user if the write access should fail. This
67    /// will inject a #GP fault into the guest when the VCPU is executed
68    /// again.
69    pub error: &'a mut u8,
70    /// The reason for this exit.
71    pub reason: MsrExitReason,
72    /// The MSR the guest wants to write.
73    pub index: u32,
74    /// The data the guest wants to write into the MSR.
75    pub data: u64,
76}
77
78bitflags::bitflags! {
79    /// The reason for a [`VcpuExit::X86Rdmsr`] or[`VcpuExit::X86Wrmsr`]. This
80    /// is also used when enabling
81    /// [`Cap::X86UserSpaceMsr`](crate::Cap::X86UserSpaceMsr) to specify which
82    /// reasons should be forwarded to the user via those exits.
83    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
84    pub struct MsrExitReason: u32 {
85        /// Corresponds to [`KVM_MSR_EXIT_REASON_UNKNOWN`]. The exit was
86        /// triggered by an access to an MSR that is unknown to KVM.
87        const Unknown = KVM_MSR_EXIT_REASON_UNKNOWN;
88        /// Corresponds to [`KVM_MSR_EXIT_REASON_INVAL`]. The exit was
89        /// triggered by an access to an invalid MSR or to reserved bits.
90        const Inval = KVM_MSR_EXIT_REASON_INVAL;
91        /// Corresponds to [`KVM_MSR_EXIT_REASON_FILTER`]. The exit was
92        /// triggered by an access to a filtered MSR.
93        const Filter = KVM_MSR_EXIT_REASON_FILTER;
94    }
95}
96
97/// Reasons for vCPU exits.
98///
99/// The exit reasons are mapped to the `KVM_EXIT_*` defines in the
100/// [Linux KVM header](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h).
101#[derive(Debug)]
102pub enum VcpuExit<'a> {
103    /// An out port instruction was run on the given port with the given data.
104    IoOut(u16 /* port */, &'a [u8] /* data */),
105    /// An in port instruction was run on the given port.
106    ///
107    /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run)
108    /// is called again.
109    IoIn(u16 /* port */, &'a mut [u8] /* data */),
110    /// A read instruction was run against the given MMIO address.
111    ///
112    /// The given slice should be filled in before [run()](struct.VcpuFd.html#method.run)
113    /// is called again.
114    MmioRead(u64 /* address */, &'a mut [u8]),
115    /// A write instruction was run against the given MMIO address with the given data.
116    MmioWrite(u64 /* address */, &'a [u8]),
117    /// Corresponds to KVM_EXIT_UNKNOWN.
118    Unknown,
119    /// Corresponds to KVM_EXIT_EXCEPTION.
120    Exception,
121    /// Corresponds to KVM_EXIT_HYPERCALL.
122    Hypercall(HypercallExit<'a>),
123    /// Corresponds to KVM_EXIT_DEBUG.
124    ///
125    /// Provides architecture specific information for the debug event.
126    Debug(kvm_debug_exit_arch),
127    /// Corresponds to KVM_EXIT_HLT.
128    Hlt,
129    /// Corresponds to KVM_EXIT_IRQ_WINDOW_OPEN.
130    IrqWindowOpen,
131    /// Corresponds to KVM_EXIT_SHUTDOWN.
132    Shutdown,
133    /// Corresponds to KVM_EXIT_FAIL_ENTRY.
134    FailEntry(
135        u64, /* hardware_entry_failure_reason */
136        u32, /* cpu */
137    ),
138    /// Corresponds to KVM_EXIT_INTR.
139    Intr,
140    /// Corresponds to KVM_EXIT_SET_TPR.
141    SetTpr,
142    /// Corresponds to KVM_EXIT_TPR_ACCESS.
143    TprAccess,
144    /// Corresponds to KVM_EXIT_S390_SIEIC.
145    S390Sieic,
146    /// Corresponds to KVM_EXIT_S390_RESET.
147    S390Reset,
148    /// Corresponds to KVM_EXIT_DCR.
149    Dcr,
150    /// Corresponds to KVM_EXIT_NMI.
151    Nmi,
152    /// Corresponds to KVM_EXIT_INTERNAL_ERROR.
153    InternalError,
154    /// Corresponds to KVM_EXIT_OSI.
155    Osi,
156    /// Corresponds to KVM_EXIT_PAPR_HCALL.
157    PaprHcall,
158    /// Corresponds to KVM_EXIT_S390_UCONTROL.
159    S390Ucontrol,
160    /// Corresponds to KVM_EXIT_WATCHDOG.
161    Watchdog,
162    /// Corresponds to KVM_EXIT_S390_TSCH.
163    S390Tsch,
164    /// Corresponds to KVM_EXIT_EPR.
165    Epr,
166    /// Corresponds to KVM_EXIT_SYSTEM_EVENT.
167    SystemEvent(u32 /* type */, &'a [u64] /* data */),
168    /// Corresponds to KVM_EXIT_S390_STSI.
169    S390Stsi,
170    /// Corresponds to KVM_EXIT_IOAPIC_EOI.
171    IoapicEoi(u8 /* vector */),
172    /// Corresponds to KVM_EXIT_HYPERV.
173    Hyperv,
174    /// Corresponds to KVM_EXIT_X86_RDMSR.
175    X86Rdmsr(ReadMsrExit<'a>),
176    /// Corresponds to KVM_EXIT_X86_WRMSR.
177    X86Wrmsr(WriteMsrExit<'a>),
178    /// Corresponds to KVM_EXIT_MEMORY_FAULT.
179    MemoryFault {
180        /// flags
181        flags: u64,
182        /// gpa
183        gpa: u64,
184        /// size
185        size: u64,
186    },
187    /// Corresponds to an exit reason that is unknown from the current version
188    /// of the kvm-ioctls crate. Let the consumer decide about what to do with
189    /// it.
190    Unsupported(u32),
191}
192
193/// Wrapper over KVM vCPU ioctls.
194#[derive(Debug)]
195pub struct VcpuFd {
196    vcpu: File,
197    kvm_run_ptr: KvmRunWrapper,
198    /// A pointer to the coalesced MMIO page
199    coalesced_mmio_ring: Option<KvmCoalescedIoRing>,
200}
201
202/// KVM Sync Registers used to tell KVM which registers to sync
203#[repr(u32)]
204#[derive(Debug, Copy, Clone)]
205#[cfg(target_arch = "x86_64")]
206pub enum SyncReg {
207    /// General purpose registers,
208    Register = KVM_SYNC_X86_REGS,
209
210    /// System registers
211    SystemRegister = KVM_SYNC_X86_SREGS,
212
213    /// CPU events
214    VcpuEvents = KVM_SYNC_X86_EVENTS,
215}
216
217impl VcpuFd {
218    /// Returns the vCPU general purpose registers.
219    ///
220    /// The registers are returned in a `kvm_regs` structure as defined in the
221    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
222    /// See documentation for `KVM_GET_REGS`.
223    ///
224    /// # Example
225    ///
226    /// ```rust
227    /// # extern crate kvm_ioctls;
228    /// # use kvm_ioctls::Kvm;
229    /// let kvm = Kvm::new().unwrap();
230    /// let vm = kvm.create_vm().unwrap();
231    /// let vcpu = vm.create_vcpu(0).unwrap();
232    /// let regs = vcpu.get_regs().unwrap();
233    /// ```
234    #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
235    pub fn get_regs(&self) -> Result<kvm_regs> {
236        let mut regs = kvm_regs::default();
237        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
238        // read the correct amount of memory from our pointer, and we verify the return result.
239        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS(), &mut regs) };
240        if ret != 0 {
241            return Err(errno::Error::last());
242        }
243        Ok(regs)
244    }
245
246    /// Sets a specified piece of cpu configuration and/or state.
247    ///
248    /// See the documentation for `KVM_SET_DEVICE_ATTR` in
249    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt)
250    /// # Arguments
251    ///
252    /// * `device_attr` - The cpu attribute to be set.
253    ///
254    /// # Example
255    ///
256    /// ```rust
257    /// # extern crate kvm_ioctls;
258    /// # extern crate kvm_bindings;
259    /// # use kvm_ioctls::Kvm;
260    /// # use kvm_bindings::{
261    ///    KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT
262    /// };
263    /// let kvm = Kvm::new().unwrap();
264    /// let vm = kvm.create_vm().unwrap();
265    /// let vcpu = vm.create_vcpu(0).unwrap();
266    ///
267    /// let dist_attr = kvm_bindings::kvm_device_attr {
268    ///     group: KVM_ARM_VCPU_PMU_V3_CTRL,
269    ///     attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
270    ///     addr: 0x0,
271    ///     flags: 0,
272    /// };
273    ///
274    /// if (vcpu.has_device_attr(&dist_attr).is_ok()) {
275    ///     vcpu.set_device_attr(&dist_attr).unwrap();
276    /// }
277    /// ```
278    #[cfg(target_arch = "aarch64")]
279    pub fn set_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> {
280        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
281        let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEVICE_ATTR(), device_attr) };
282        if ret != 0 {
283            return Err(errno::Error::last());
284        }
285        Ok(())
286    }
287
288    /// Tests whether a cpu supports a particular attribute.
289    ///
290    /// See the documentation for `KVM_HAS_DEVICE_ATTR` in
291    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt)
292    /// # Arguments
293    ///
294    /// * `device_attr` - The cpu attribute to be tested. `addr` field is ignored.
295    ///
296    /// # Example
297    ///
298    /// ```rust
299    /// # extern crate kvm_ioctls;
300    /// # extern crate kvm_bindings;
301    /// # use kvm_ioctls::Kvm;
302    /// # use kvm_bindings::{
303    ///    KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT
304    /// };
305    /// let kvm = Kvm::new().unwrap();
306    /// let vm = kvm.create_vm().unwrap();
307    /// let vcpu = vm.create_vcpu(0).unwrap();
308    ///
309    /// let dist_attr = kvm_bindings::kvm_device_attr {
310    ///     group: KVM_ARM_VCPU_PMU_V3_CTRL,
311    ///     attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
312    ///     addr: 0x0,
313    ///     flags: 0,
314    /// };
315    ///
316    /// vcpu.has_device_attr(&dist_attr);
317    /// ```
318    #[cfg(target_arch = "aarch64")]
319    pub fn has_device_attr(&self, device_attr: &kvm_device_attr) -> Result<()> {
320        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
321        let ret = unsafe { ioctl_with_ref(self, KVM_HAS_DEVICE_ATTR(), device_attr) };
322        if ret != 0 {
323            return Err(errno::Error::last());
324        }
325        Ok(())
326    }
327
328    /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
329    ///
330    /// # Arguments
331    ///
332    /// * `regs` - general purpose registers. For details check the `kvm_regs` structure in the
333    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
334    ///
335    /// # Example
336    ///
337    /// ```rust
338    /// # extern crate kvm_ioctls;
339    /// # use kvm_ioctls::Kvm;
340    /// let kvm = Kvm::new().unwrap();
341    /// let vm = kvm.create_vm().unwrap();
342    /// let vcpu = vm.create_vcpu(0).unwrap();
343    ///
344    /// // Get the current vCPU registers.
345    /// let mut regs = vcpu.get_regs().unwrap();
346    /// // Set a new value for the Instruction Pointer.
347    /// regs.rip = 0x100;
348    /// vcpu.set_regs(&regs).unwrap();
349    /// ```
350    #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))]
351    pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
352        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
353        // read the correct amount of memory from our pointer, and we verify the return result.
354        let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS(), regs) };
355        if ret != 0 {
356            return Err(errno::Error::last());
357        }
358        Ok(())
359    }
360
361    /// Returns the vCPU special registers.
362    ///
363    /// The registers are returned in a `kvm_sregs` structure as defined in the
364    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
365    /// See documentation for `KVM_GET_SREGS`.
366    ///
367    /// # Example
368    ///
369    /// ```rust
370    /// # extern crate kvm_ioctls;
371    /// # use kvm_ioctls::Kvm;
372    /// let kvm = Kvm::new().unwrap();
373    /// let vm = kvm.create_vm().unwrap();
374    /// let vcpu = vm.create_vcpu(0).unwrap();
375    /// let sregs = vcpu.get_sregs().unwrap();
376    /// ```
377    #[cfg(target_arch = "x86_64")]
378    pub fn get_sregs(&self) -> Result<kvm_sregs> {
379        let mut regs = kvm_sregs::default();
380        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
381        // write the correct amount of memory to our pointer, and we verify the return result.
382        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS(), &mut regs) };
383        if ret != 0 {
384            return Err(errno::Error::last());
385        }
386        Ok(regs)
387    }
388
389    /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
390    ///
391    /// # Arguments
392    ///
393    /// * `sregs` - Special registers. For details check the `kvm_sregs` structure in the
394    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
395    ///
396    /// # Example
397    ///
398    /// ```rust
399    /// # extern crate kvm_ioctls;
400    /// # use kvm_ioctls::Kvm;
401    /// let kvm = Kvm::new().unwrap();
402    /// let vm = kvm.create_vm().unwrap();
403    /// let vcpu = vm.create_vcpu(0).unwrap();
404    ///
405    /// let mut sregs = vcpu.get_sregs().unwrap();
406    /// // Update the code segment (cs).
407    /// sregs.cs.base = 0;
408    /// sregs.cs.selector = 0;
409    /// vcpu.set_sregs(&sregs).unwrap();
410    /// ```
411    #[cfg(target_arch = "x86_64")]
412    pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
413        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
414        // read the correct amount of memory from our pointer, and we verify the return result.
415        let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS(), sregs) };
416        if ret != 0 {
417            return Err(errno::Error::last());
418        }
419        Ok(())
420    }
421
422    /// Returns the floating point state (FPU) from the vCPU.
423    ///
424    /// The state is returned in a `kvm_fpu` structure as defined in the
425    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
426    /// See the documentation for `KVM_GET_FPU`.
427    ///
428    /// # Example
429    ///
430    /// ```rust
431    /// # extern crate kvm_ioctls;
432    /// # use kvm_ioctls::Kvm;
433    /// let kvm = Kvm::new().unwrap();
434    /// let vm = kvm.create_vm().unwrap();
435    /// let vcpu = vm.create_vcpu(0).unwrap();
436    /// let fpu = vcpu.get_fpu().unwrap();
437    /// ```
438    #[cfg(target_arch = "x86_64")]
439    pub fn get_fpu(&self) -> Result<kvm_fpu> {
440        let mut fpu = kvm_fpu::default();
441        // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct.
442        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU(), &mut fpu) };
443        if ret != 0 {
444            return Err(errno::Error::last());
445        }
446        Ok(fpu)
447    }
448
449    /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
450    ///
451    /// # Arguments
452    ///
453    /// * `fpu` - FPU configuration. For details check the `kvm_fpu` structure in the
454    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
455    ///
456    /// # Example
457    ///
458    /// ```rust
459    /// # extern crate kvm_ioctls;
460    /// # extern crate kvm_bindings;
461    /// # use kvm_ioctls::Kvm;
462    /// # use kvm_bindings::kvm_fpu;
463    /// let kvm = Kvm::new().unwrap();
464    /// let vm = kvm.create_vm().unwrap();
465    /// let vcpu = vm.create_vcpu(0).unwrap();
466    ///
467    /// let KVM_FPU_CWD: u16 = 0x37f;
468    /// let fpu = kvm_fpu {
469    ///     fcw: KVM_FPU_CWD,
470    ///     ..Default::default()
471    /// };
472    /// vcpu.set_fpu(&fpu).unwrap();
473    /// ```
474    #[cfg(target_arch = "x86_64")]
475    pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
476        // SAFETY: Here we trust the kernel not to read past the end of the kvm_fpu struct.
477        let ret = unsafe { ioctl_with_ref(self, KVM_SET_FPU(), fpu) };
478        if ret < 0 {
479            return Err(errno::Error::last());
480        }
481        Ok(())
482    }
483
484    /// X86 specific call to setup the CPUID registers.
485    ///
486    /// See the documentation for `KVM_SET_CPUID2`.
487    ///
488    /// # Arguments
489    ///
490    /// * `cpuid` - CPUID registers.
491    ///
492    /// # Example
493    ///
494    ///  ```rust
495    /// # extern crate kvm_ioctls;
496    /// # extern crate kvm_bindings;
497    /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES;
498    /// # use kvm_ioctls::Kvm;
499    /// let kvm = Kvm::new().unwrap();
500    /// let mut kvm_cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
501    /// let vm = kvm.create_vm().unwrap();
502    /// let vcpu = vm.create_vcpu(0).unwrap();
503    ///
504    /// // Update the CPUID entries to disable the EPB feature.
505    /// const ECX_EPB_SHIFT: u32 = 3;
506    /// let entries = kvm_cpuid.as_mut_slice();
507    /// for entry in entries.iter_mut() {
508    ///     match entry.function {
509    ///         6 => entry.ecx &= !(1 << ECX_EPB_SHIFT),
510    ///         _ => (),
511    ///     }
512    /// }
513    ///
514    /// vcpu.set_cpuid2(&kvm_cpuid).unwrap();
515    /// ```
516    ///
517    #[cfg(target_arch = "x86_64")]
518    pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
519        // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct.
520        let ret = unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2(), cpuid.as_fam_struct_ptr()) };
521        if ret < 0 {
522            return Err(errno::Error::last());
523        }
524        Ok(())
525    }
526
527    /// X86 specific call to retrieve the CPUID registers.
528    ///
529    /// It requires knowledge of how many `kvm_cpuid_entry2` entries there are to get.
530    /// See the documentation for `KVM_GET_CPUID2` in the
531    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
532    ///
533    /// # Arguments
534    ///
535    /// * `num_entries` - Number of CPUID entries to be read.
536    ///
537    /// # Example
538    ///
539    ///  ```rust
540    /// # extern crate kvm_ioctls;
541    /// # extern crate kvm_bindings;
542    /// # use kvm_bindings::KVM_MAX_CPUID_ENTRIES;
543    /// # use kvm_ioctls::Kvm;
544    /// let kvm = Kvm::new().unwrap();
545    /// let vm = kvm.create_vm().unwrap();
546    /// let vcpu = vm.create_vcpu(0).unwrap();
547    /// let cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES).unwrap();
548    /// ```
549    ///
550    #[cfg(target_arch = "x86_64")]
551    pub fn get_cpuid2(&self, num_entries: usize) -> Result<CpuId> {
552        if num_entries > KVM_MAX_CPUID_ENTRIES {
553            // Returns the same error the underlying `ioctl` would have sent.
554            return Err(errno::Error::new(libc::ENOMEM));
555        }
556
557        let mut cpuid = CpuId::new(num_entries).map_err(|_| errno::Error::new(libc::ENOMEM))?;
558        let ret =
559            // SAFETY: Here we trust the kernel not to read past the end of the kvm_cpuid2 struct.
560            unsafe { ioctl_with_mut_ptr(self, KVM_GET_CPUID2(), cpuid.as_mut_fam_struct_ptr()) };
561        if ret != 0 {
562            return Err(errno::Error::last());
563        }
564        Ok(cpuid)
565    }
566
567    ///
568    /// See the documentation for `KVM_ENABLE_CAP`.
569    ///
570    /// # Arguments
571    ///
572    /// * kvm_enable_cap - KVM capability structure. For details check the `kvm_enable_cap`
573    ///   structure in the
574    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
575    ///
576    /// # Example
577    ///
578    ///  ```rust
579    /// # extern crate kvm_ioctls;
580    /// # extern crate kvm_bindings;
581    /// # use kvm_bindings::{kvm_enable_cap, KVM_MAX_CPUID_ENTRIES, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP};
582    /// # use kvm_ioctls::{Kvm, Cap};
583    /// let kvm = Kvm::new().unwrap();
584    /// let vm = kvm.create_vm().unwrap();
585    /// let mut cap: kvm_enable_cap = Default::default();
586    /// // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled
587    /// cap.cap = KVM_CAP_SPLIT_IRQCHIP;
588    /// cap.args[0] = 24;
589    /// vm.enable_cap(&cap).unwrap();
590    ///
591    /// let vcpu = vm.create_vcpu(0).unwrap();
592    /// if kvm.check_extension(Cap::HypervSynic) {
593    ///     let mut cap: kvm_enable_cap = Default::default();
594    ///     cap.cap = KVM_CAP_HYPERV_SYNIC;
595    ///     vcpu.enable_cap(&cap).unwrap();
596    /// }
597    /// ```
598    ///
599    #[cfg(target_arch = "x86_64")]
600    pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
601        // SAFETY: The ioctl is safe because we allocated the struct and we know the
602        // kernel will write exactly the size of the struct.
603        let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) };
604        if ret == 0 {
605            Ok(())
606        } else {
607            Err(errno::Error::last())
608        }
609    }
610
611    /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
612    ///
613    /// The state is returned in a `kvm_lapic_state` structure as defined in the
614    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
615    /// See the documentation for `KVM_GET_LAPIC`.
616    ///
617    /// # Example
618    ///
619    /// ```rust
620    /// # extern crate kvm_ioctls;
621    /// # use kvm_ioctls::Kvm;
622    /// let kvm = Kvm::new().unwrap();
623    /// let vm = kvm.create_vm().unwrap();
624    /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU.
625    /// vm.create_irq_chip().unwrap();
626    /// let vcpu = vm.create_vcpu(0).unwrap();
627    /// let lapic = vcpu.get_lapic().unwrap();
628    /// ```
629    #[cfg(target_arch = "x86_64")]
630    pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
631        let mut klapic = kvm_lapic_state::default();
632
633        // SAFETY: The ioctl is unsafe unless you trust the kernel not to write past the end of the
634        // local_apic struct.
635        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC(), &mut klapic) };
636        if ret < 0 {
637            return Err(errno::Error::last());
638        }
639        Ok(klapic)
640    }
641
642    /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
643    ///
644    /// See the documentation for `KVM_SET_LAPIC`.
645    ///
646    /// # Arguments
647    ///
648    /// * `klapic` - LAPIC state. For details check the `kvm_lapic_state` structure in the
649    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
650    /// # Example
651    ///
652    /// ```rust
653    /// # extern crate kvm_ioctls;
654    /// # use kvm_ioctls::Kvm;
655    /// use std::io::Write;
656    ///
657    /// let kvm = Kvm::new().unwrap();
658    /// let vm = kvm.create_vm().unwrap();
659    /// // For `get_lapic` to work, you first need to create a IRQ chip before creating the vCPU.
660    /// vm.create_irq_chip().unwrap();
661    /// let vcpu = vm.create_vcpu(0).unwrap();
662    /// let mut lapic = vcpu.get_lapic().unwrap();
663    ///
664    /// // Write to APIC_ICR offset the value 2.
665    /// let apic_icr_offset = 0x300;
666    /// let write_value: &[u8] = &[2, 0, 0, 0];
667    /// let mut apic_icr_slice =
668    ///     unsafe { &mut *(&mut lapic.regs[apic_icr_offset..] as *mut [i8] as *mut [u8]) };
669    /// apic_icr_slice.write(write_value).unwrap();
670    ///
671    /// // Update the value of LAPIC.
672    /// vcpu.set_lapic(&lapic).unwrap();
673    /// ```
674    #[cfg(target_arch = "x86_64")]
675    pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
676        // SAFETY: The ioctl is safe because the kernel will only read from the klapic struct.
677        let ret = unsafe { ioctl_with_ref(self, KVM_SET_LAPIC(), klapic) };
678        if ret < 0 {
679            return Err(errno::Error::last());
680        }
681        Ok(())
682    }
683
684    /// Returns the model-specific registers (MSR) for this vCPU.
685    ///
686    /// It emulates `KVM_GET_MSRS` ioctl's behavior by returning the number of MSRs
687    /// successfully read upon success or the last error number in case of failure.
688    /// The MSRs are returned in the `msr` method argument.
689    ///
690    /// # Arguments
691    ///
692    /// * `msrs`  - MSRs (input/output). For details check the `kvm_msrs` structure in the
693    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
694    ///
695    /// # Example
696    ///
697    /// ```rust
698    /// # extern crate kvm_ioctls;
699    /// # extern crate kvm_bindings;
700    /// # use kvm_ioctls::Kvm;
701    /// # use kvm_bindings::{kvm_msr_entry, Msrs};
702    /// let kvm = Kvm::new().unwrap();
703    /// let vm = kvm.create_vm().unwrap();
704    /// let vcpu = vm.create_vcpu(0).unwrap();
705    /// // Configure the struct to say which entries we want to get.
706    /// let mut msrs = Msrs::from_entries(&[
707    ///     kvm_msr_entry {
708    ///         index: 0x0000_0174,
709    ///         ..Default::default()
710    ///     },
711    ///     kvm_msr_entry {
712    ///         index: 0x0000_0175,
713    ///         ..Default::default()
714    ///     },
715    /// ])
716    /// .unwrap();
717    /// let read = vcpu.get_msrs(&mut msrs).unwrap();
718    /// assert_eq!(read, 2);
719    /// ```
720    #[cfg(target_arch = "x86_64")]
721    pub fn get_msrs(&self, msrs: &mut Msrs) -> Result<usize> {
722        // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct.
723        let ret = unsafe { ioctl_with_mut_ptr(self, KVM_GET_MSRS(), msrs.as_mut_fam_struct_ptr()) };
724        if ret < 0 {
725            return Err(errno::Error::last());
726        }
727        Ok(ret as usize)
728    }
729
730    /// Setup the model-specific registers (MSR) for this vCPU.
731    /// Returns the number of MSR entries actually written.
732    ///
733    /// See the documentation for `KVM_SET_MSRS`.
734    ///
735    /// # Arguments
736    ///
737    /// * `msrs` - MSRs. For details check the `kvm_msrs` structure in the
738    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
739    /// # Example
740    ///
741    /// ```rust
742    /// # extern crate kvm_ioctls;
743    /// # extern crate kvm_bindings;
744    /// # use kvm_ioctls::Kvm;
745    /// # use kvm_bindings::{kvm_msr_entry, Msrs};
746    /// let kvm = Kvm::new().unwrap();
747    /// let vm = kvm.create_vm().unwrap();
748    /// let vcpu = vm.create_vcpu(0).unwrap();
749    ///
750    /// // Configure the entries we want to set.
751    /// let mut msrs = Msrs::from_entries(&[kvm_msr_entry {
752    ///     index: 0x0000_0174,
753    ///     ..Default::default()
754    /// }])
755    /// .unwrap();
756    /// let written = vcpu.set_msrs(&msrs).unwrap();
757    /// assert_eq!(written, 1);
758    /// ```
759    #[cfg(target_arch = "x86_64")]
760    pub fn set_msrs(&self, msrs: &Msrs) -> Result<usize> {
761        // SAFETY: Here we trust the kernel not to read past the end of the kvm_msrs struct.
762        let ret = unsafe { ioctl_with_ptr(self, KVM_SET_MSRS(), msrs.as_fam_struct_ptr()) };
763        // KVM_SET_MSRS actually returns the number of msr entries written.
764        if ret < 0 {
765            return Err(errno::Error::last());
766        }
767        Ok(ret as usize)
768    }
769
770    /// Returns the vcpu's current "multiprocessing state".
771    ///
772    /// See the documentation for `KVM_GET_MP_STATE` in the
773    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
774    ///
775    /// # Arguments
776    ///
777    /// * `kvm_mp_state` - multiprocessing state to be read.
778    ///
779    /// # Example
780    ///
781    /// ```rust
782    /// # extern crate kvm_ioctls;
783    /// # use kvm_ioctls::Kvm;
784    /// let kvm = Kvm::new().unwrap();
785    /// let vm = kvm.create_vm().unwrap();
786    /// let vcpu = vm.create_vcpu(0).unwrap();
787    /// let mp_state = vcpu.get_mp_state().unwrap();
788    /// ```
789    #[cfg(any(
790        target_arch = "x86_64",
791        target_arch = "aarch64",
792        target_arch = "riscv64",
793        target_arch = "s390x"
794    ))]
795    pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
796        let mut mp_state = Default::default();
797        // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct.
798        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE(), &mut mp_state) };
799        if ret != 0 {
800            return Err(errno::Error::last());
801        }
802        Ok(mp_state)
803    }
804
805    /// Sets the vcpu's current "multiprocessing state".
806    ///
807    /// See the documentation for `KVM_SET_MP_STATE` in the
808    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
809    ///
810    /// # Arguments
811    ///
812    /// * `kvm_mp_state` - multiprocessing state to be written.
813    ///
814    /// # Example
815    ///
816    /// ```rust
817    /// # extern crate kvm_ioctls;
818    /// # use kvm_ioctls::Kvm;
819    /// let kvm = Kvm::new().unwrap();
820    /// let vm = kvm.create_vm().unwrap();
821    /// let vcpu = vm.create_vcpu(0).unwrap();
822    /// let mp_state = Default::default();
823    /// // Your `mp_state` manipulation here.
824    /// vcpu.set_mp_state(mp_state).unwrap();
825    /// ```
826    #[cfg(any(
827        target_arch = "x86_64",
828        target_arch = "aarch64",
829        target_arch = "riscv64",
830        target_arch = "s390x"
831    ))]
832    pub fn set_mp_state(&self, mp_state: kvm_mp_state) -> Result<()> {
833        // SAFETY: Here we trust the kernel not to read past the end of the kvm_mp_state struct.
834        let ret = unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE(), &mp_state) };
835        if ret != 0 {
836            return Err(errno::Error::last());
837        }
838        Ok(())
839    }
840
841    /// X86 specific call that returns the vcpu's current "xsave struct".
842    ///
843    /// See the documentation for `KVM_GET_XSAVE` in the
844    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
845    ///
846    /// # Arguments
847    ///
848    /// * `kvm_xsave` - xsave struct to be read.
849    ///
850    /// # Example
851    ///
852    /// ```rust
853    /// # extern crate kvm_ioctls;
854    /// # use kvm_ioctls::Kvm;
855    /// let kvm = Kvm::new().unwrap();
856    /// let vm = kvm.create_vm().unwrap();
857    /// let vcpu = vm.create_vcpu(0).unwrap();
858    /// let xsave = vcpu.get_xsave().unwrap();
859    /// ```
860    #[cfg(target_arch = "x86_64")]
861    pub fn get_xsave(&self) -> Result<kvm_xsave> {
862        let mut xsave = Default::default();
863        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct.
864        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XSAVE(), &mut xsave) };
865        if ret != 0 {
866            return Err(errno::Error::last());
867        }
868        Ok(xsave)
869    }
870
871    /// X86 specific call that gets the current vcpu's "xsave struct" via `KVM_GET_XSAVE2`.
872    ///
873    /// See the documentation for `KVM_GET_XSAVE2` in the
874    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
875    ///
876    /// # Arguments
877    ///
878    /// * `xsave` - A mutable reference to an [`Xsave`] instance that will be populated with the
879    ///   current vcpu's "xsave struct".
880    ///
881    /// # Safety
882    ///
883    /// This function is unsafe because there is no guarantee `xsave` is allocated with enough space
884    /// to hold the entire xsave state.
885    ///
886    /// The required size in bytes can be retrieved via `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` and
887    /// can vary depending on features that have been dynamically enabled by `arch_prctl()`. Thus,
888    /// any features must not be enabled dynamically after the required size has been confirmed.
889    ///
890    /// If `xsave` is not large enough, `KVM_GET_XSAVE2` copies data beyond the allocated area,
891    /// possibly causing undefined behavior.
892    ///
893    /// See the documentation for dynamically enabled XSTATE features in the
894    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
895    ///
896    /// # Example
897    ///
898    /// ```rust
899    /// # extern crate kvm_ioctls;
900    /// # extern crate kvm_bindings;
901    /// # extern crate vmm_sys_util;
902    /// # use kvm_ioctls::{Kvm, Cap};
903    /// # use kvm_bindings::{Xsave, kvm_xsave, kvm_xsave2};
904    /// # use vmm_sys_util::fam::FamStruct;
905    /// let kvm = Kvm::new().unwrap();
906    /// let vm = kvm.create_vm().unwrap();
907    /// let vcpu = vm.create_vcpu(0).unwrap();
908    /// let xsave_size = vm.check_extension_int(Cap::Xsave2);
909    /// if xsave_size > 0 {
910    ///     let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
911    ///         .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
912    ///     let mut xsave = Xsave::new(fam_size).unwrap();
913    ///     unsafe { vcpu.get_xsave2(&mut xsave).unwrap() };
914    /// }
915    /// ```
916    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
917    pub unsafe fn get_xsave2(&self, xsave: &mut Xsave) -> Result<()> {
918        // SAFETY: Safe as long as `xsave` is allocated with enough space to hold the entire "xsave
919        // struct". That's why this function is unsafe.
920        let ret = unsafe {
921            ioctl_with_mut_ref(self, KVM_GET_XSAVE2(), &mut xsave.as_mut_fam_struct().xsave)
922        };
923        if ret != 0 {
924            return Err(errno::Error::last());
925        }
926        Ok(())
927    }
928
929    /// X86 specific call that sets the vcpu's current "xsave struct".
930    ///
931    /// See the documentation for `KVM_SET_XSAVE` in the
932    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
933    ///
934    /// # Arguments
935    ///
936    /// * `xsave` - xsave struct to be written.
937    ///
938    /// # Safety
939    ///
940    /// The C `kvm_xsave` struct was extended to have a flexible array member (FAM) at the end in
941    /// Linux 5.17. The size can vary depending on features that have been dynamically enabled via
942    /// `arch_prctl()` and the required size can be retrieved via
943    /// `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)`. That means `KVM_SET_XSAVE` may copy data beyond the
944    /// size of the traditional C `kvm_xsave` struct (i.e. 4096 bytes) now.
945    ///
946    /// It is safe if used on Linux prior to 5.17, if no XSTATE features are enabled dynamically or
947    /// if the required size is still within the traditional 4096 bytes even with dynamically
948    /// enabled features. However, if any features are enabled dynamically, it is recommended to use
949    /// `set_xsave2()` instead.
950    ///
951    /// See the documentation for dynamically enabled XSTATE features in the
952    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
953    ///
954    /// Theoretically, it can be made safe by checking which features are enabled in the bit vector
955    /// of the XSTATE header and validating the required size is less than or equal to 4096 bytes.
956    /// However, to do it properly, we would need to extract the XSTATE header from the `kvm_xsave`
957    /// struct, check enabled features, retrieve the required size for each enabled feature (like
958    /// `setup_xstate_cache()` do in Linux) and calculate the total size.
959    ///
960    /// # Example
961    ///
962    /// ```rust
963    /// # extern crate kvm_ioctls;
964    /// # use kvm_ioctls::Kvm;
965    /// let kvm = Kvm::new().unwrap();
966    /// let vm = kvm.create_vm().unwrap();
967    /// let vcpu = vm.create_vcpu(0).unwrap();
968    /// let xsave = Default::default();
969    /// // Your `xsave` manipulation here.
970    /// unsafe { vcpu.set_xsave(&xsave).unwrap() };
971    /// ```
972    #[cfg(target_arch = "x86_64")]
973    pub unsafe fn set_xsave(&self, xsave: &kvm_xsave) -> Result<()> {
974        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct.
975        let ret = unsafe { ioctl_with_ref(self, KVM_SET_XSAVE(), xsave) };
976        if ret != 0 {
977            return Err(errno::Error::last());
978        }
979        Ok(())
980    }
981
982    /// Convenience function for doing `KVM_SET_XSAVE` with the FAM-enabled [`Xsave`]
983    /// instead of the pre-5.17 plain [`kvm_xsave`].
984    ///
985    /// # Arguments
986    ///
987    /// * `xsave` - A reference to an [`Xsave`] instance to be set.
988    ///
989    /// # Safety
990    ///
991    /// This function is unsafe because there is no guarantee `xsave` is properly allocated with
992    /// the size that KVM assumes.
993    ///
994    /// The required size in bytes can be retrieved via `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` and
995    /// can vary depending on features that have been dynamically enabled by `arch_prctl()`. Thus,
996    /// any features must not be enabled after the required size has been confirmed.
997    ///
998    /// If `xsave` is not large enough, `KVM_SET_XSAVE` copies data beyond the allocated area to
999    /// the kernel, possibly causing undefined behavior.
1000    ///
1001    /// See the documentation for dynamically enabled XSTATE features in the
1002    /// [kernel doc](https://docs.kernel.org/arch/x86/xstate.html).
1003    ///
1004    /// # Example
1005    ///
1006    /// ```rust
1007    /// # extern crate kvm_ioctls;
1008    /// # extern crate kvm_bindings;
1009    /// # extern crate vmm_sys_util;
1010    /// # use kvm_ioctls::{Kvm, Cap};
1011    /// # use kvm_bindings::{Xsave, kvm_xsave, kvm_xsave2};
1012    /// # use vmm_sys_util::fam::FamStruct;
1013    /// let kvm = Kvm::new().unwrap();
1014    /// let vm = kvm.create_vm().unwrap();
1015    /// let vcpu = vm.create_vcpu(0).unwrap();
1016    /// let xsave_size = vm.check_extension_int(Cap::Xsave2);
1017    /// if xsave_size > 0 {
1018    ///     let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
1019    ///         .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
1020    ///     let xsave = Xsave::new(fam_size).unwrap();
1021    ///     // Your `xsave` manipulation here.
1022    ///     unsafe { vcpu.set_xsave2(&xsave).unwrap() };
1023    /// }
1024    /// ```
1025    #[cfg(target_arch = "x86_64")]
1026    pub unsafe fn set_xsave2(&self, xsave: &Xsave) -> Result<()> {
1027        // SAFETY: we trust the kernel and verified parameters
1028        unsafe { self.set_xsave(&xsave.as_fam_struct_ref().xsave) }
1029    }
1030
1031    /// X86 specific call that returns the vcpu's current "xcrs".
1032    ///
1033    /// See the documentation for `KVM_GET_XCRS` in the
1034    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1035    ///
1036    /// # Arguments
1037    ///
1038    /// * `kvm_xcrs` - xcrs to be read.
1039    ///
1040    /// # Example
1041    ///
1042    /// ```rust
1043    /// # extern crate kvm_ioctls;
1044    /// # use kvm_ioctls::Kvm;
1045    /// let kvm = Kvm::new().unwrap();
1046    /// let vm = kvm.create_vm().unwrap();
1047    /// let vcpu = vm.create_vcpu(0).unwrap();
1048    /// let xcrs = vcpu.get_xcrs().unwrap();
1049    /// ```
1050    #[cfg(target_arch = "x86_64")]
1051    pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1052        let mut xcrs = Default::default();
1053        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1054        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS(), &mut xcrs) };
1055        if ret != 0 {
1056            return Err(errno::Error::last());
1057        }
1058        Ok(xcrs)
1059    }
1060
1061    /// X86 specific call that sets the vcpu's current "xcrs".
1062    ///
1063    /// See the documentation for `KVM_SET_XCRS` in the
1064    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1065    ///
1066    /// # Arguments
1067    ///
1068    /// * `kvm_xcrs` - xcrs to be written.
1069    ///
1070    /// # Example
1071    ///
1072    /// ```rust
1073    /// # extern crate kvm_ioctls;
1074    /// # use kvm_ioctls::Kvm;
1075    /// let kvm = Kvm::new().unwrap();
1076    /// let vm = kvm.create_vm().unwrap();
1077    /// let vcpu = vm.create_vcpu(0).unwrap();
1078    /// let xcrs = Default::default();
1079    /// // Your `xcrs` manipulation here.
1080    /// vcpu.set_xcrs(&xcrs).unwrap();
1081    /// ```
1082    #[cfg(target_arch = "x86_64")]
1083    pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1084        // SAFETY: Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1085        let ret = unsafe { ioctl_with_ref(self, KVM_SET_XCRS(), xcrs) };
1086        if ret != 0 {
1087            return Err(errno::Error::last());
1088        }
1089        Ok(())
1090    }
1091
1092    /// X86 specific call that returns the vcpu's current "debug registers".
1093    ///
1094    /// See the documentation for `KVM_GET_DEBUGREGS` in the
1095    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1096    ///
1097    /// # Arguments
1098    ///
1099    /// * `kvm_debugregs` - debug registers to be read.
1100    ///
1101    /// # Example
1102    ///
1103    /// ```rust
1104    /// # extern crate kvm_ioctls;
1105    /// # use kvm_ioctls::Kvm;
1106    /// let kvm = Kvm::new().unwrap();
1107    /// let vm = kvm.create_vm().unwrap();
1108    /// let vcpu = vm.create_vcpu(0).unwrap();
1109    /// let debug_regs = vcpu.get_debug_regs().unwrap();
1110    /// ```
1111    #[cfg(target_arch = "x86_64")]
1112    pub fn get_debug_regs(&self) -> Result<kvm_debugregs> {
1113        let mut debug_regs = Default::default();
1114        // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct.
1115        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS(), &mut debug_regs) };
1116        if ret != 0 {
1117            return Err(errno::Error::last());
1118        }
1119        Ok(debug_regs)
1120    }
1121
1122    /// X86 specific call that sets the vcpu's current "debug registers".
1123    ///
1124    /// See the documentation for `KVM_SET_DEBUGREGS` in the
1125    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1126    ///
1127    /// # Arguments
1128    ///
1129    /// * `kvm_debugregs` - debug registers to be written.
1130    ///
1131    /// # Example
1132    ///
1133    /// ```rust
1134    /// # extern crate kvm_ioctls;
1135    /// # use kvm_ioctls::Kvm;
1136    /// let kvm = Kvm::new().unwrap();
1137    /// let vm = kvm.create_vm().unwrap();
1138    /// let vcpu = vm.create_vcpu(0).unwrap();
1139    /// let debug_regs = Default::default();
1140    /// // Your `debug_regs` manipulation here.
1141    /// vcpu.set_debug_regs(&debug_regs).unwrap();
1142    /// ```
1143    #[cfg(target_arch = "x86_64")]
1144    pub fn set_debug_regs(&self, debug_regs: &kvm_debugregs) -> Result<()> {
1145        // SAFETY: Here we trust the kernel not to read past the end of the kvm_debugregs struct.
1146        let ret = unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS(), debug_regs) };
1147        if ret != 0 {
1148            return Err(errno::Error::last());
1149        }
1150        Ok(())
1151    }
1152
1153    /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1154    /// states of the vcpu.
1155    ///
1156    /// See the documentation for `KVM_GET_VCPU_EVENTS` in the
1157    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1158    ///
1159    /// # Arguments
1160    ///
1161    /// * `kvm_vcpu_events` - vcpu events to be read.
1162    ///
1163    /// # Example
1164    ///
1165    /// ```rust
1166    /// # extern crate kvm_ioctls;
1167    /// # use kvm_ioctls::{Kvm, Cap};
1168    /// let kvm = Kvm::new().unwrap();
1169    /// if kvm.check_extension(Cap::VcpuEvents) {
1170    ///     let vm = kvm.create_vm().unwrap();
1171    ///     let vcpu = vm.create_vcpu(0).unwrap();
1172    ///     let vcpu_events = vcpu.get_vcpu_events().unwrap();
1173    /// }
1174    /// ```
1175    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1176    pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1177        let mut vcpu_events = Default::default();
1178        // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct.
1179        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS(), &mut vcpu_events) };
1180        if ret != 0 {
1181            return Err(errno::Error::last());
1182        }
1183        Ok(vcpu_events)
1184    }
1185
1186    /// Sets pending exceptions, interrupts, and NMIs as well as related states of the vcpu.
1187    ///
1188    /// See the documentation for `KVM_SET_VCPU_EVENTS` in the
1189    /// [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1190    ///
1191    /// # Arguments
1192    ///
1193    /// * `kvm_vcpu_events` - vcpu events to be written.
1194    ///
1195    /// # Example
1196    ///
1197    /// ```rust
1198    /// # extern crate kvm_ioctls;
1199    /// # use kvm_ioctls::{Kvm, Cap};
1200    /// let kvm = Kvm::new().unwrap();
1201    /// if kvm.check_extension(Cap::VcpuEvents) {
1202    ///     let vm = kvm.create_vm().unwrap();
1203    ///     let vcpu = vm.create_vcpu(0).unwrap();
1204    ///     let vcpu_events = Default::default();
1205    ///     // Your `vcpu_events` manipulation here.
1206    ///     vcpu.set_vcpu_events(&vcpu_events).unwrap();
1207    /// }
1208    /// ```
1209    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1210    pub fn set_vcpu_events(&self, vcpu_events: &kvm_vcpu_events) -> Result<()> {
1211        // SAFETY: Here we trust the kernel not to read past the end of the kvm_vcpu_events struct.
1212        let ret = unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS(), vcpu_events) };
1213        if ret != 0 {
1214            return Err(errno::Error::last());
1215        }
1216        Ok(())
1217    }
1218
1219    /// Sets the type of CPU to be exposed to the guest and optional features.
1220    ///
1221    /// This initializes an ARM vCPU to the specified type with the specified features
1222    /// and resets the values of all of its registers to defaults. See the documentation for
1223    /// `KVM_ARM_VCPU_INIT`.
1224    ///
1225    /// # Arguments
1226    ///
1227    /// * `kvi` - information about preferred CPU target type and recommended features for it.
1228    ///   For details check the `kvm_vcpu_init` structure in the
1229    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1230    ///
1231    /// # Example
1232    /// ```rust
1233    /// # extern crate kvm_ioctls;
1234    /// # extern crate kvm_bindings;
1235    /// # use kvm_ioctls::Kvm;
1236    /// use kvm_bindings::kvm_vcpu_init;
1237    /// let kvm = Kvm::new().unwrap();
1238    /// let vm = kvm.create_vm().unwrap();
1239    /// let vcpu = vm.create_vcpu(0).unwrap();
1240    ///
1241    /// let mut kvi = kvm_vcpu_init::default();
1242    /// vm.get_preferred_target(&mut kvi).unwrap();
1243    /// vcpu.vcpu_init(&kvi).unwrap();
1244    /// ```
1245    #[cfg(target_arch = "aarch64")]
1246    pub fn vcpu_init(&self, kvi: &kvm_vcpu_init) -> Result<()> {
1247        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1248        // exactly the size of the struct.
1249        let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT(), kvi) };
1250        if ret < 0 {
1251            return Err(errno::Error::last());
1252        }
1253        Ok(())
1254    }
1255
1256    /// Finalizes the configuration of the specified vcpu feature.
1257    ///
1258    /// The vcpu must already have been initialised, enabling the affected feature,
1259    /// by means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set
1260    /// in features[].
1261    ///
1262    /// For affected vcpu features, this is a mandatory step that must be performed before
1263    /// the vcpu is fully usable.
1264    ///
1265    /// Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be configured
1266    /// by use of ioctls such as KVM_SET_ONE_REG. The exact configuration that should be
1267    /// performaned and how to do it are feature-dependent.
1268    ///
1269    /// Other calls that depend on a particular feature being finalized, such as KVM_RUN,
1270    /// KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with -EPERM unless
1271    /// the feature has already been finalized by means of a KVM_ARM_VCPU_FINALIZE call.
1272    ///
1273    /// See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl.
1274    /// [KVM_ARM_VCPU_FINALIZE](https://www.kernel.org/doc/html/latest/virt/kvm/api.html#kvm-arm-vcpu-finalize).
1275    ///
1276    /// # Arguments
1277    ///
1278    /// * `feature` - vCPU features that needs to be finalized.
1279    ///
1280    /// # Example
1281    /// ```rust
1282    /// # extern crate kvm_ioctls;
1283    /// # extern crate kvm_bindings;
1284    /// # use kvm_ioctls::Kvm;
1285    /// use std::arch::is_aarch64_feature_detected;
1286    ///
1287    /// use kvm_bindings::{KVM_ARM_VCPU_SVE, kvm_vcpu_init};
1288    /// let kvm = Kvm::new().unwrap();
1289    /// let vm = kvm.create_vm().unwrap();
1290    /// let vcpu = vm.create_vcpu(0).unwrap();
1291    ///
1292    /// let mut kvi = kvm_vcpu_init::default();
1293    /// vm.get_preferred_target(&mut kvi).unwrap();
1294    /// kvi.features[0] |= 1 << KVM_ARM_VCPU_SVE;
1295    /// if is_aarch64_feature_detected!("sve2") || is_aarch64_feature_detected!("sve") {
1296    ///     vcpu.vcpu_init(&kvi).unwrap();
1297    ///     let feature = KVM_ARM_VCPU_SVE as i32;
1298    ///     vcpu.vcpu_finalize(&feature).unwrap();
1299    /// }
1300    /// ```
1301    #[cfg(target_arch = "aarch64")]
1302    pub fn vcpu_finalize(&self, feature: &std::os::raw::c_int) -> Result<()> {
1303        // SAFETY: This is safe because we know the kernel will only read this
1304        // parameter to select the correct finalization case in KVM.
1305        let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_FINALIZE(), feature) };
1306        if ret < 0 {
1307            return Err(errno::Error::last());
1308        }
1309        Ok(())
1310    }
1311
1312    /// Returns the guest registers that are supported for the
1313    /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1314    ///
1315    /// # Arguments
1316    ///
1317    /// * `reg_list`  - list of registers (input/output). For details check the `kvm_reg_list`
1318    ///   structure in the
1319    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1320    ///
1321    /// # Example
1322    ///
1323    /// ```rust
1324    /// # extern crate kvm_ioctls;
1325    /// # extern crate kvm_bindings;
1326    /// # use kvm_ioctls::Kvm;
1327    /// # use kvm_bindings::RegList;
1328    /// let kvm = Kvm::new().unwrap();
1329    /// let vm = kvm.create_vm().unwrap();
1330    /// let vcpu = vm.create_vcpu(0).unwrap();
1331    ///
1332    /// // KVM_GET_REG_LIST on Aarch64 demands that the vcpus be initialized.
1333    /// # #[cfg(target_arch = "aarch64")]
1334    /// # {
1335    /// let mut kvi = kvm_bindings::kvm_vcpu_init::default();
1336    /// vm.get_preferred_target(&mut kvi).unwrap();
1337    /// vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
1338    ///
1339    /// let mut reg_list = RegList::new(500).unwrap();
1340    /// vcpu.get_reg_list(&mut reg_list).unwrap();
1341    /// assert!(reg_list.as_fam_struct_ref().n > 0);
1342    /// # }
1343    /// ```
1344    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1345    pub fn get_reg_list(&self, reg_list: &mut RegList) -> Result<()> {
1346        let ret =
1347            // SAFETY: This is safe because we allocated the struct and we trust the kernel will read
1348            // exactly the size of the struct.
1349            unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST(), reg_list.as_mut_fam_struct()) };
1350        if ret < 0 {
1351            return Err(errno::Error::last());
1352        }
1353        Ok(())
1354    }
1355
1356    /// Sets processor-specific debug registers and configures the vcpu for handling
1357    /// certain guest debug events using the `KVM_SET_GUEST_DEBUG` ioctl.
1358    ///
1359    /// # Arguments
1360    ///
1361    /// * `debug_struct` - control bitfields and debug registers, depending on the specific architecture.
1362    ///   For details check the `kvm_guest_debug` structure in the
1363    ///   [KVM API doc](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1364    ///
1365    /// # Example
1366    ///
1367    /// ```rust
1368    /// # extern crate kvm_ioctls;
1369    /// # extern crate kvm_bindings;
1370    /// # use kvm_ioctls::Kvm;
1371    /// # use kvm_bindings::{
1372    /// #     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_USE_SW_BP, kvm_guest_debug_arch, kvm_guest_debug
1373    /// # };
1374    /// let kvm = Kvm::new().unwrap();
1375    /// let vm = kvm.create_vm().unwrap();
1376    /// let vcpu = vm.create_vcpu(0).unwrap();
1377    ///
1378    /// let debug_struct = kvm_guest_debug {
1379    ///     // Configure the vcpu so that a KVM_DEBUG_EXIT would be generated
1380    ///     // when encountering a software breakpoint during execution
1381    ///     control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP,
1382    ///     pad: 0,
1383    ///     // Reset all arch-specific debug registers
1384    ///     arch: Default::default(),
1385    /// };
1386    ///
1387    /// vcpu.set_guest_debug(&debug_struct).unwrap();
1388    /// ```
1389    #[cfg(any(
1390        target_arch = "x86_64",
1391        target_arch = "aarch64",
1392        target_arch = "s390x",
1393        target_arch = "powerpc"
1394    ))]
1395    pub fn set_guest_debug(&self, debug_struct: &kvm_guest_debug) -> Result<()> {
1396        // SAFETY: Safe because we allocated the structure and we trust the kernel.
1397        let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG(), debug_struct) };
1398        if ret < 0 {
1399            return Err(errno::Error::last());
1400        }
1401        Ok(())
1402    }
1403
1404    /// Sets the value of one register for this vCPU.
1405    ///
1406    /// The id of the register is encoded as specified in the kernel documentation
1407    /// for `KVM_SET_ONE_REG`.
1408    ///
1409    /// # Arguments
1410    ///
1411    /// * `reg_id` - ID of the register for which we are setting the value.
1412    /// * `data` - byte slice where the register value will be written to.
1413    ///
1414    /// # Note
1415    ///
1416    /// `data` should be equal or bigger then the register size
1417    /// oterwise function will return EINVAL error
1418    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1419    pub fn set_one_reg(&self, reg_id: u64, data: &[u8]) -> Result<usize> {
1420        let reg_size = reg_size(reg_id);
1421        if data.len() < reg_size {
1422            return Err(errno::Error::new(libc::EINVAL));
1423        }
1424        let onereg = kvm_one_reg {
1425            id: reg_id,
1426            addr: data.as_ptr() as u64,
1427        };
1428        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1429        // exactly the size of the struct.
1430        let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG(), &onereg) };
1431        if ret < 0 {
1432            return Err(errno::Error::last());
1433        }
1434        Ok(reg_size)
1435    }
1436
1437    /// Writes the value of the specified vCPU register into provided buffer.
1438    ///
1439    /// The id of the register is encoded as specified in the kernel documentation
1440    /// for `KVM_GET_ONE_REG`.
1441    ///
1442    /// # Arguments
1443    ///
1444    /// * `reg_id` - ID of the register.
1445    /// * `data` - byte slice where the register value will be written to.
1446    /// # Note
1447    ///
1448    /// `data` should be equal or bigger then the register size
1449    /// oterwise function will return EINVAL error
1450    #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1451    pub fn get_one_reg(&self, reg_id: u64, data: &mut [u8]) -> Result<usize> {
1452        let reg_size = reg_size(reg_id);
1453        if data.len() < reg_size {
1454            return Err(errno::Error::new(libc::EINVAL));
1455        }
1456        let mut onereg = kvm_one_reg {
1457            id: reg_id,
1458            addr: data.as_ptr() as u64,
1459        };
1460        // SAFETY: This is safe because we allocated the struct and we know the kernel will read
1461        // exactly the size of the struct.
1462        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_ONE_REG(), &mut onereg) };
1463        if ret < 0 {
1464            return Err(errno::Error::last());
1465        }
1466        Ok(reg_size)
1467    }
1468
1469    /// Notify the guest about the vCPU being paused.
1470    ///
1471    /// See the documentation for `KVM_KVMCLOCK_CTRL` in the
1472    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1473    #[cfg(target_arch = "x86_64")]
1474    pub fn kvmclock_ctrl(&self) -> Result<()> {
1475        // SAFETY: Safe because we know that our file is a KVM fd and that the request
1476        // is one of the ones defined by kernel.
1477        let ret = unsafe { ioctl(self, KVM_KVMCLOCK_CTRL()) };
1478        if ret != 0 {
1479            return Err(errno::Error::last());
1480        }
1481        Ok(())
1482    }
1483
1484    /// Triggers the running of the current virtual CPU returning an exit reason.
1485    ///
1486    /// See documentation for `KVM_RUN`.
1487    ///
1488    /// # Example
1489    ///
1490    /// Running some dummy code on x86_64 that immediately halts the vCPU. Based on
1491    /// [https://lwn.net/Articles/658511/](https://lwn.net/Articles/658511/).
1492    ///
1493    /// ```rust
1494    /// # extern crate kvm_ioctls;
1495    /// # extern crate kvm_bindings;
1496    /// # use std::io::Write;
1497    /// # use std::ptr::null_mut;
1498    /// # use std::slice;
1499    /// # use kvm_ioctls::{Kvm, VcpuExit};
1500    /// # use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES};
1501    /// # let kvm = Kvm::new().unwrap();
1502    /// # let vm = kvm.create_vm().unwrap();
1503    ///
1504    /// # #[cfg(target_arch = "x86_64")]
1505    /// # {
1506    /// let mem_size = 0x4000;
1507    /// let guest_addr: u64 = 0x1000;
1508    /// let load_addr: *mut u8 = unsafe {
1509    ///     libc::mmap(
1510    ///         null_mut(),
1511    ///         mem_size,
1512    ///         libc::PROT_READ | libc::PROT_WRITE,
1513    ///         libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
1514    ///         -1,
1515    ///         0,
1516    ///     ) as *mut u8
1517    /// };
1518    ///
1519    /// let mem_region = kvm_userspace_memory_region {
1520    ///     slot: 0,
1521    ///     guest_phys_addr: guest_addr,
1522    ///     memory_size: mem_size as u64,
1523    ///     userspace_addr: load_addr as u64,
1524    ///     flags: 0,
1525    /// };
1526    /// unsafe { vm.set_user_memory_region(mem_region).unwrap() };
1527    ///
1528    /// // Dummy x86 code that just calls halt.
1529    /// let x86_code = [0xf4 /* hlt */];
1530    ///
1531    /// // Write the code in the guest memory. This will generate a dirty page.
1532    /// unsafe {
1533    ///     let mut slice = slice::from_raw_parts_mut(load_addr, mem_size);
1534    ///     slice.write(&x86_code).unwrap();
1535    /// }
1536    ///
1537    /// let mut vcpu_fd = vm.create_vcpu(0).unwrap();
1538    ///
1539    /// let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap();
1540    /// vcpu_sregs.cs.base = 0;
1541    /// vcpu_sregs.cs.selector = 0;
1542    /// vcpu_fd.set_sregs(&vcpu_sregs).unwrap();
1543    ///
1544    /// let mut vcpu_regs = vcpu_fd.get_regs().unwrap();
1545    /// // Set the Instruction Pointer to the guest address where we loaded the code.
1546    /// vcpu_regs.rip = guest_addr;
1547    /// vcpu_regs.rax = 2;
1548    /// vcpu_regs.rbx = 3;
1549    /// vcpu_regs.rflags = 2;
1550    /// vcpu_fd.set_regs(&vcpu_regs).unwrap();
1551    ///
1552    /// loop {
1553    ///     match vcpu_fd.run().expect("run failed") {
1554    ///         VcpuExit::Hlt => {
1555    ///             break;
1556    ///         }
1557    ///         exit_reason => panic!("unexpected exit reason: {:?}", exit_reason),
1558    ///     }
1559    /// }
1560    /// # }
1561    /// ```
1562    pub fn run(&mut self) -> Result<VcpuExit<'_>> {
1563        // SAFETY: Safe because we know that our file is a vCPU fd and we verify the return result.
1564        let ret = unsafe { ioctl(self, KVM_RUN()) };
1565        if ret == 0 {
1566            let run = self.kvm_run_ptr.as_mut_ref();
1567            match run.exit_reason {
1568                // make sure you treat all possible exit reasons from include/uapi/linux/kvm.h corresponding
1569                // when upgrading to a different kernel version
1570                KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1571                KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1572                KVM_EXIT_IO => {
1573                    let run_start = run as *mut kvm_run as *mut u8;
1574                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1575                    // which union field to use.
1576                    let io = unsafe { run.__bindgen_anon_1.io };
1577                    let port = io.port;
1578                    let data_size = io.count as usize * io.size as usize;
1579                    // SAFETY: The data_offset is defined by the kernel to be some number of bytes
1580                    // into the kvm_run stucture, which we have fully mmap'd.
1581                    let data_ptr = unsafe { run_start.offset(io.data_offset as isize) };
1582                    let data_slice =
1583                        // SAFETY: The slice's lifetime is limited to the lifetime of this vCPU, which is equal
1584                        // to the mmap of the `kvm_run` struct that this is slicing from.
1585                        unsafe { std::slice::from_raw_parts_mut::<u8>(data_ptr, data_size) };
1586                    match u32::from(io.direction) {
1587                        KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn(port, data_slice)),
1588                        KVM_EXIT_IO_OUT => Ok(VcpuExit::IoOut(port, data_slice)),
1589                        _ => Err(errno::Error::new(EINVAL)),
1590                    }
1591                }
1592                KVM_EXIT_HYPERCALL => {
1593                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1594                    // which union field to use.
1595                    let hypercall = unsafe { &mut run.__bindgen_anon_1.hypercall };
1596                    Ok(VcpuExit::Hypercall(HypercallExit {
1597                        nr: hypercall.nr,
1598                        args: hypercall.args,
1599                        ret: &mut hypercall.ret,
1600                        // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1601                        // which union field to use.
1602                        longmode: unsafe { hypercall.__bindgen_anon_1.longmode },
1603                    }))
1604                }
1605                KVM_EXIT_DEBUG => {
1606                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1607                    // which union field to use.
1608                    let debug = unsafe { run.__bindgen_anon_1.debug };
1609                    Ok(VcpuExit::Debug(debug.arch))
1610                }
1611                KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1612                KVM_EXIT_MMIO => {
1613                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1614                    // which union field to use.
1615                    let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1616                    let addr = mmio.phys_addr;
1617                    let len = mmio.len as usize;
1618                    let data_slice = &mut mmio.data[..len];
1619                    if mmio.is_write != 0 {
1620                        Ok(VcpuExit::MmioWrite(addr, data_slice))
1621                    } else {
1622                        Ok(VcpuExit::MmioRead(addr, data_slice))
1623                    }
1624                }
1625                KVM_EXIT_X86_RDMSR => {
1626                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1627                    // which union field to use.
1628                    let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1629                    let exit = ReadMsrExit {
1630                        error: &mut msr.error,
1631                        reason: MsrExitReason::from_bits_truncate(msr.reason),
1632                        index: msr.index,
1633                        data: &mut msr.data,
1634                    };
1635                    Ok(VcpuExit::X86Rdmsr(exit))
1636                }
1637                KVM_EXIT_X86_WRMSR => {
1638                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1639                    // which union field to use.
1640                    let msr = unsafe { &mut run.__bindgen_anon_1.msr };
1641                    let exit = WriteMsrExit {
1642                        error: &mut msr.error,
1643                        reason: MsrExitReason::from_bits_truncate(msr.reason),
1644                        index: msr.index,
1645                        data: msr.data,
1646                    };
1647                    Ok(VcpuExit::X86Wrmsr(exit))
1648                }
1649                KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1650                KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1651                KVM_EXIT_FAIL_ENTRY => {
1652                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1653                    // which union field to use.
1654                    let fail_entry = unsafe { &mut run.__bindgen_anon_1.fail_entry };
1655                    Ok(VcpuExit::FailEntry(
1656                        fail_entry.hardware_entry_failure_reason,
1657                        fail_entry.cpu,
1658                    ))
1659                }
1660                KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1661                KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1662                KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1663                KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1664                KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1665                KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1666                KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1667                KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1668                KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1669                KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1670                KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1671                KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1672                KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1673                KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1674                KVM_EXIT_SYSTEM_EVENT => {
1675                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1676                    // which union field to use.
1677                    let system_event = unsafe { &mut run.__bindgen_anon_1.system_event };
1678                    let ndata = system_event.ndata;
1679                    // SAFETY: Safe because we only populate with valid data (based on ndata)
1680                    let data = unsafe { &system_event.__bindgen_anon_1.data[0..ndata as usize] };
1681                    Ok(VcpuExit::SystemEvent(system_event.type_, data))
1682                }
1683                KVM_EXIT_S390_STSI => Ok(VcpuExit::S390Stsi),
1684                KVM_EXIT_IOAPIC_EOI => {
1685                    // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1686                    // which union field to use.
1687                    let eoi = unsafe { &mut run.__bindgen_anon_1.eoi };
1688                    Ok(VcpuExit::IoapicEoi(eoi.vector))
1689                }
1690                KVM_EXIT_HYPERV => Ok(VcpuExit::Hyperv),
1691                r => Ok(VcpuExit::Unsupported(r)),
1692            }
1693        } else {
1694            let errno = errno::Error::last();
1695            let run = self.kvm_run_ptr.as_mut_ref();
1696            // From https://docs.kernel.org/virt/kvm/api.html#kvm-run :
1697            //
1698            // KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it accompanies
1699            // a return code of ‘-1’, not ‘0’! errno will always be set to EFAULT or EHWPOISON
1700            // when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume kvm_run.exit_reason
1701            // is stale/undefined for all other error numbers.
1702            if ret == -1
1703                && (errno == errno::Error::new(libc::EFAULT)
1704                    || errno == errno::Error::new(libc::EHWPOISON))
1705                && run.exit_reason == KVM_EXIT_MEMORY_FAULT
1706            {
1707                // SAFETY: Safe because the exit_reason (which comes from the kernel) told us
1708                // which union field to use.
1709                let fault = unsafe { &mut run.__bindgen_anon_1.memory_fault };
1710                Ok(VcpuExit::MemoryFault {
1711                    flags: fault.flags,
1712                    gpa: fault.gpa,
1713                    size: fault.size,
1714                })
1715            } else {
1716                Err(errno)
1717            }
1718        }
1719    }
1720
1721    /// Returns a mutable reference to the kvm_run structure
1722    pub fn get_kvm_run(&mut self) -> &mut kvm_run {
1723        self.kvm_run_ptr.as_mut_ref()
1724    }
1725
1726    /// Sets the `immediate_exit` flag on the `kvm_run` struct associated with this vCPU to `val`.
1727    pub fn set_kvm_immediate_exit(&mut self, val: u8) {
1728        let kvm_run = self.kvm_run_ptr.as_mut_ref();
1729        kvm_run.immediate_exit = val;
1730    }
1731
1732    /// Returns the vCPU TSC frequency in KHz or an error if the host has unstable TSC.
1733    ///
1734    /// # Example
1735    ///
1736    ///  ```rust
1737    /// # extern crate kvm_ioctls;
1738    /// # use kvm_ioctls::Kvm;
1739    /// let kvm = Kvm::new().unwrap();
1740    /// let vm = kvm.create_vm().unwrap();
1741    /// let vcpu = vm.create_vcpu(0).unwrap();
1742    /// let tsc_khz = vcpu.get_tsc_khz().unwrap();
1743    /// ```
1744    ///
1745    #[cfg(target_arch = "x86_64")]
1746    pub fn get_tsc_khz(&self) -> Result<u32> {
1747        // SAFETY:  Safe because we know that our file is a KVM fd and that the request is one of
1748        // the ones defined by kernel.
1749        let ret = unsafe { ioctl(self, KVM_GET_TSC_KHZ()) };
1750        if ret >= 0 {
1751            Ok(ret as u32)
1752        } else {
1753            Err(errno::Error::new(ret))
1754        }
1755    }
1756
1757    /// Sets the specified vCPU TSC frequency.
1758    ///
1759    /// # Arguments
1760    ///
1761    /// * `freq` - The frequency unit is KHz as per the KVM API documentation
1762    ///   for `KVM_SET_TSC_KHZ`.
1763    ///
1764    /// # Example
1765    ///
1766    ///  ```rust
1767    /// # extern crate kvm_ioctls;
1768    /// # use kvm_ioctls::{Cap, Kvm};
1769    /// let kvm = Kvm::new().unwrap();
1770    /// let vm = kvm.create_vm().unwrap();
1771    /// let vcpu = vm.create_vcpu(0).unwrap();
1772    /// if kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl) {
1773    ///     vcpu.set_tsc_khz(1000).unwrap();
1774    /// }
1775    /// ```
1776    ///
1777    #[cfg(target_arch = "x86_64")]
1778    pub fn set_tsc_khz(&self, freq: u32) -> Result<()> {
1779        // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of
1780        // the ones defined by kernel.
1781        let ret = unsafe { ioctl_with_val(self, KVM_SET_TSC_KHZ(), freq as u64) };
1782        if ret < 0 {
1783            Err(errno::Error::last())
1784        } else {
1785            Ok(())
1786        }
1787    }
1788
1789    /// Translates a virtual address according to the vCPU's current address translation mode.
1790    ///
1791    /// The physical address is returned in a `kvm_translation` structure as defined in the
1792    /// [KVM API documentation](https://www.kernel.org/doc/Documentation/virtual/kvm/api.txt).
1793    /// See documentation for `KVM_TRANSLATE`.
1794    ///
1795    /// # Arguments
1796    ///
1797    /// * `gva` - The virtual address to translate.
1798    ///
1799    /// # Example
1800    ///
1801    /// ```rust
1802    /// # extern crate kvm_ioctls;
1803    /// # use kvm_ioctls::Kvm;
1804    /// let kvm = Kvm::new().unwrap();
1805    /// let vm = kvm.create_vm().unwrap();
1806    /// let vcpu = vm.create_vcpu(0).unwrap();
1807    /// #[cfg(target_arch = "x86_64")]
1808    /// let tr = vcpu.translate_gva(0x10000).unwrap();
1809    /// ```
1810    #[cfg(target_arch = "x86_64")]
1811    pub fn translate_gva(&self, gva: u64) -> Result<kvm_translation> {
1812        let mut tr = kvm_translation {
1813            linear_address: gva,
1814            ..Default::default()
1815        };
1816
1817        // SAFETY: Safe because we know that our file is a vCPU fd, we know the kernel will only
1818        // write the correct amount of memory to our pointer, and we verify the return result.
1819        let ret = unsafe { ioctl_with_mut_ref(self, KVM_TRANSLATE(), &mut tr) };
1820        if ret != 0 {
1821            return Err(errno::Error::last());
1822        }
1823        Ok(tr)
1824    }
1825
1826    /// Enable the given [`SyncReg`] to be copied to userspace on the next exit
1827    ///
1828    /// # Arguments
1829    ///
1830    /// * `reg` - The [`SyncReg`] to copy out of the guest
1831    ///
1832    /// # Example
1833    ///
1834    ///  ```rust
1835    /// # extern crate kvm_ioctls;
1836    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1837    /// let kvm = Kvm::new().unwrap();
1838    /// let vm = kvm.create_vm().unwrap();
1839    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1840    /// vcpu.set_sync_valid_reg(SyncReg::Register);
1841    /// vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
1842    /// vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
1843    /// ```
1844    #[cfg(target_arch = "x86_64")]
1845    pub fn set_sync_valid_reg(&mut self, reg: SyncReg) {
1846        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1847        kvm_run.kvm_valid_regs |= reg as u64;
1848    }
1849
1850    /// Tell KVM to copy the given [`SyncReg`] into the guest on the next entry
1851    ///
1852    /// # Arguments
1853    ///
1854    /// * `reg` - The [`SyncReg`] to copy into the guest
1855    ///
1856    /// # Example
1857    ///
1858    ///  ```rust
1859    /// # extern crate kvm_ioctls;
1860    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1861    /// let kvm = Kvm::new().unwrap();
1862    /// let vm = kvm.create_vm().unwrap();
1863    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1864    /// vcpu.set_sync_dirty_reg(SyncReg::Register);
1865    /// ```
1866    #[cfg(target_arch = "x86_64")]
1867    pub fn set_sync_dirty_reg(&mut self, reg: SyncReg) {
1868        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1869        kvm_run.kvm_dirty_regs |= reg as u64;
1870    }
1871
1872    /// Disable the given [`SyncReg`] to be copied to userspace on the next exit
1873    ///
1874    /// # Arguments
1875    ///
1876    /// * `reg` - The [`SyncReg`] to not copy out of the guest
1877    ///
1878    /// # Example
1879    ///
1880    ///  ```rust
1881    /// # extern crate kvm_ioctls;
1882    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1883    /// let kvm = Kvm::new().unwrap();
1884    /// let vm = kvm.create_vm().unwrap();
1885    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1886    /// vcpu.clear_sync_valid_reg(SyncReg::Register);
1887    /// ```
1888    #[cfg(target_arch = "x86_64")]
1889    pub fn clear_sync_valid_reg(&mut self, reg: SyncReg) {
1890        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1891        kvm_run.kvm_valid_regs &= !(reg as u64);
1892    }
1893
1894    /// Tell KVM to not copy the given [`SyncReg`] into the guest on the next entry
1895    ///
1896    /// # Arguments
1897    ///
1898    /// * `reg` - The [`SyncReg`] to not copy out into the guest
1899    ///
1900    /// # Example
1901    ///
1902    ///  ```rust
1903    /// # extern crate kvm_ioctls;
1904    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1905    /// let kvm = Kvm::new().unwrap();
1906    /// let vm = kvm.create_vm().unwrap();
1907    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1908    /// vcpu.clear_sync_dirty_reg(SyncReg::Register);
1909    /// ```
1910    #[cfg(target_arch = "x86_64")]
1911    pub fn clear_sync_dirty_reg(&mut self, reg: SyncReg) {
1912        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1913        kvm_run.kvm_dirty_regs &= !(reg as u64);
1914    }
1915
1916    /// Get the [`kvm_sync_regs`] from the VM
1917    ///
1918    /// # Example
1919    ///
1920    ///  ```rust
1921    /// # extern crate kvm_ioctls;
1922    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1923    /// let kvm = Kvm::new().unwrap();
1924    /// let vm = kvm.create_vm().unwrap();
1925    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1926    /// if kvm.check_extension(Cap::SyncRegs) {
1927    ///     vcpu.set_sync_valid_reg(SyncReg::Register);
1928    ///     vcpu.run();
1929    ///     let guest_rax = vcpu.sync_regs().regs.rax;
1930    /// }
1931    /// ```
1932    #[cfg(target_arch = "x86_64")]
1933    pub fn sync_regs(&self) -> kvm_sync_regs {
1934        let kvm_run = self.kvm_run_ptr.as_ref();
1935
1936        // SAFETY: Accessing this union field could be out of bounds if the `kvm_run`
1937        // allocation isn't large enough. The `kvm_run` region is set using
1938        // `get_vcpu_map_size`, so this region is in bounds
1939        unsafe { kvm_run.s.regs }
1940    }
1941
1942    /// Get a mutable reference to the [`kvm_sync_regs`] from the VM
1943    ///
1944    /// # Example
1945    ///
1946    ///  ```rust
1947    /// # extern crate kvm_ioctls;
1948    /// # use kvm_ioctls::{Kvm, SyncReg, Cap};
1949    /// let kvm = Kvm::new().unwrap();
1950    /// let vm = kvm.create_vm().unwrap();
1951    /// let mut vcpu = vm.create_vcpu(0).unwrap();
1952    /// if kvm.check_extension(Cap::SyncRegs) {
1953    ///     vcpu.set_sync_valid_reg(SyncReg::Register);
1954    ///     vcpu.run();
1955    ///     // Set the guest RAX to 0xdeadbeef
1956    ///     vcpu.sync_regs_mut().regs.rax = 0xdeadbeef;
1957    ///     vcpu.set_sync_dirty_reg(SyncReg::Register);
1958    ///     vcpu.run();
1959    /// }
1960    /// ```
1961    #[cfg(target_arch = "x86_64")]
1962    pub fn sync_regs_mut(&mut self) -> &mut kvm_sync_regs {
1963        let kvm_run: &mut kvm_run = self.kvm_run_ptr.as_mut_ref();
1964
1965        // SAFETY: Accessing this union field could be out of bounds if the `kvm_run`
1966        // allocation isn't large enough. The `kvm_run` region is set using
1967        // `get_vcpu_map_size`, so this region is in bounds
1968        unsafe { &mut kvm_run.s.regs }
1969    }
1970
1971    /// Triggers an SMI on the virtual CPU.
1972    ///
1973    /// See documentation for `KVM_SMI`.
1974    ///
1975    /// ```rust
1976    /// # use kvm_ioctls::{Kvm, Cap};
1977    /// let kvm = Kvm::new().unwrap();
1978    /// let vm = kvm.create_vm().unwrap();
1979    /// let vcpu = vm.create_vcpu(0).unwrap();
1980    /// if kvm.check_extension(Cap::X86Smm) {
1981    ///     vcpu.smi().unwrap();
1982    /// }
1983    /// ```
1984    #[cfg(target_arch = "x86_64")]
1985    pub fn smi(&self) -> Result<()> {
1986        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
1987        let ret = unsafe { ioctl(self, KVM_SMI()) };
1988        match ret {
1989            0 => Ok(()),
1990            _ => Err(errno::Error::last()),
1991        }
1992    }
1993
1994    /// Returns the nested guest state using the `KVM_GET_NESTED_STATE` ioctl.
1995    ///
1996    /// This only works when `KVM_CAP_NESTED_STATE` is available.
1997    ///
1998    /// # Arguments
1999    ///
2000    /// - `buffer`: The buffer to be filled with the new nested state.
2001    ///
2002    /// # Return Value
2003    /// If this returns `None`, KVM doesn't have nested state. Otherwise, the
2004    /// actual length of the state is returned.
2005    ///
2006    /// # Example
2007    ///
2008    /// ```rust
2009    /// # use kvm_ioctls::{Kvm, Cap, KvmNestedStateBuffer};
2010    /// let kvm = Kvm::new().unwrap();
2011    /// let vm = kvm.create_vm().unwrap();
2012    /// let vcpu = vm.create_vcpu(0).unwrap();
2013    /// let mut state_buffer = KvmNestedStateBuffer::empty();
2014    /// if kvm.check_extension(Cap::NestedState) {
2015    ///     vcpu.nested_state(&mut state_buffer).unwrap();
2016    ///     // Next, serialize the actual state into a file or so.
2017    /// }
2018    /// ```
2019    ///
2020    /// [`Kvm::check_extension_int`]: kvm_ioctls::Kvm::check_extension_int
2021    #[cfg(target_arch = "x86_64")]
2022    pub fn nested_state(
2023        &self,
2024        buffer: &mut KvmNestedStateBuffer,
2025    ) -> Result<Option<NonZeroUsize /* actual length of state */>> {
2026        assert_ne!(buffer.size, 0, "buffer should not report a size of zero");
2027
2028        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
2029        let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_NESTED_STATE(), buffer) };
2030        match ret {
2031            0 => {
2032                let size = buffer.size as usize;
2033                if size == size_of::<kvm_nested_state /* just the empty header */>() {
2034                    Ok(None)
2035                } else {
2036                    Ok(Some(NonZeroUsize::new(size).unwrap()))
2037                }
2038            }
2039            _ => Err(errno::Error::last()),
2040        }
2041    }
2042
2043    /// Sets the nested guest state using the `KVM_SET_NESTED_STATE` ioctl.
2044    ///
2045    /// This only works when  `KVM_CAP_NESTED_STATE` is available.
2046    ///
2047    /// # Arguments
2048    ///
2049    /// - `state`: The new state to be put into KVM. The header must report the
2050    ///   `size` of the state properly. The state must be retrieved first using
2051    ///   [`Self::nested_state`].
2052    ///
2053    /// # Example
2054    ///
2055    /// ```rust
2056    /// # use kvm_ioctls::{Kvm, Cap, KvmNestedStateBuffer};
2057    /// let kvm = Kvm::new().unwrap();
2058    /// let vm = kvm.create_vm().unwrap();
2059    /// let vcpu = vm.create_vcpu(0).unwrap();
2060    /// if kvm.check_extension(Cap::NestedState) {
2061    ///     let mut state_buffer = KvmNestedStateBuffer::empty();
2062    ///     vcpu.nested_state(&mut state_buffer).unwrap();
2063    ///     // Rename the variable to better reflect the role.
2064    ///     let old_state = state_buffer;
2065    ///
2066    ///     // now assume we transfer the state to a new location
2067    ///     // and load it back into kvm:
2068    ///     vcpu.set_nested_state(&old_state).unwrap();
2069    /// }
2070    /// ```
2071    #[cfg(target_arch = "x86_64")]
2072    pub fn set_nested_state(&self, state: &KvmNestedStateBuffer) -> Result<()> {
2073        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
2074        let ret = unsafe { ioctl_with_ref(self, KVM_SET_NESTED_STATE(), state) };
2075        match ret {
2076            0 => Ok(()),
2077            _ => Err(errno::Error::last()),
2078        }
2079    }
2080
2081    /// Queues an NMI on the thread's vcpu. Only usable if `KVM_CAP_USER_NMI`
2082    /// is available.
2083    ///
2084    /// See the documentation for `KVM_NMI`.
2085    ///
2086    /// # Example
2087    ///
2088    /// ```rust
2089    /// # use kvm_ioctls::{Kvm, Cap};
2090    /// let kvm = Kvm::new().unwrap();
2091    /// let vm = kvm.create_vm().unwrap();
2092    /// let vcpu = vm.create_vcpu(0).unwrap();
2093    /// if kvm.check_extension(Cap::UserNmi) {
2094    ///     vcpu.nmi().unwrap();
2095    /// }
2096    /// ```
2097    #[cfg(target_arch = "x86_64")]
2098    pub fn nmi(&self) -> Result<()> {
2099        // SAFETY: Safe because we call this with a Vcpu fd and we trust the kernel.
2100        let ret = unsafe { ioctl(self, KVM_NMI()) };
2101        match ret {
2102            0 => Ok(()),
2103            _ => Err(errno::Error::last()),
2104        }
2105    }
2106
2107    /// Maps the coalesced MMIO ring page. This allows reading entries from
2108    /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read).
2109    ///
2110    /// # Returns
2111    ///
2112    /// Returns an error if the buffer could not be mapped, usually because
2113    /// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
2114    /// is not available.
2115    ///
2116    /// # Examples
2117    ///
2118    /// ```rust
2119    /// # use kvm_ioctls::{Kvm, Cap};
2120    /// let kvm = Kvm::new().unwrap();
2121    /// let vm = kvm.create_vm().unwrap();
2122    /// let mut vcpu = vm.create_vcpu(0).unwrap();
2123    /// if kvm.check_extension(Cap::CoalescedMmio) {
2124    ///     vcpu.map_coalesced_mmio_ring().unwrap();
2125    /// }
2126    /// ```
2127    pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> {
2128        if self.coalesced_mmio_ring.is_none() {
2129            let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?;
2130            self.coalesced_mmio_ring = Some(ring);
2131        }
2132        Ok(())
2133    }
2134
2135    /// Read a single entry from the coalesced MMIO ring.
2136    /// For entries to be appended to the ring by the kernel, addresses must be registered
2137    /// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()).
2138    ///
2139    /// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand.
2140    ///
2141    /// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`.
2142    ///
2143    /// # Returns
2144    ///
2145    /// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring)
2146    ///   was not called beforehand.
2147    /// * [`Ok<None>`] if the ring is empty.
2148    /// * [`Ok<Some<kvm_coalesced_mmio>>`] if an entry was successfully read.
2149    pub fn coalesced_mmio_read(&mut self) -> Result<Option<kvm_coalesced_mmio>> {
2150        self.coalesced_mmio_ring
2151            .as_mut()
2152            .ok_or(errno::Error::new(libc::EIO))
2153            .map(|ring| ring.read_entry())
2154    }
2155}
2156
2157/// Helper function to create a new `VcpuFd`.
2158///
2159/// This should not be exported as a public function because the preferred way is to use
2160/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because
2161/// then it would be exported with the public `VcpuFd` interface.
2162pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd {
2163    VcpuFd {
2164        vcpu,
2165        kvm_run_ptr,
2166        coalesced_mmio_ring: None,
2167    }
2168}
2169
2170impl AsRawFd for VcpuFd {
2171    fn as_raw_fd(&self) -> RawFd {
2172        self.vcpu.as_raw_fd()
2173    }
2174}
2175
2176#[cfg(test)]
2177mod tests {
2178    #![allow(clippy::undocumented_unsafe_blocks)]
2179    extern crate byteorder;
2180
2181    use super::*;
2182    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
2183    use crate::cap::Cap;
2184    use crate::ioctls::system::Kvm;
2185    use std::ptr::NonNull;
2186
2187    // Helper function for memory mapping `size` bytes of anonymous memory.
2188    // Panics if the mmap fails.
2189    fn mmap_anonymous(size: usize) -> NonNull<u8> {
2190        use std::ptr::null_mut;
2191
2192        let addr = unsafe {
2193            libc::mmap(
2194                null_mut(),
2195                size,
2196                libc::PROT_READ | libc::PROT_WRITE,
2197                libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE,
2198                -1,
2199                0,
2200            )
2201        };
2202        if addr == libc::MAP_FAILED {
2203            panic!("mmap failed.");
2204        }
2205
2206        NonNull::new(addr).unwrap().cast()
2207    }
2208
2209    #[test]
2210    fn test_create_vcpu() {
2211        let kvm = Kvm::new().unwrap();
2212        let vm = kvm.create_vm().unwrap();
2213
2214        vm.create_vcpu(0).unwrap();
2215    }
2216
2217    #[cfg(target_arch = "x86_64")]
2218    #[test]
2219    fn test_get_cpuid() {
2220        let kvm = Kvm::new().unwrap();
2221        if kvm.check_extension(Cap::ExtCpuid) {
2222            let vm = kvm.create_vm().unwrap();
2223            let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2224            let ncpuids = cpuid.as_slice().len();
2225            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2226            let nr_vcpus = kvm.get_nr_vcpus();
2227            for cpu_idx in 0..nr_vcpus {
2228                let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap();
2229                vcpu.set_cpuid2(&cpuid).unwrap();
2230                let retrieved_cpuid = vcpu.get_cpuid2(ncpuids).unwrap();
2231                // Only check the first few leafs as some (e.g. 13) are reserved.
2232                assert_eq!(cpuid.as_slice()[..3], retrieved_cpuid.as_slice()[..3]);
2233            }
2234        }
2235    }
2236
2237    #[cfg(target_arch = "x86_64")]
2238    #[test]
2239    fn test_get_cpuid_fail_num_entries_too_high() {
2240        let kvm = Kvm::new().unwrap();
2241        if kvm.check_extension(Cap::ExtCpuid) {
2242            let vm = kvm.create_vm().unwrap();
2243            let vcpu = vm.create_vcpu(0).unwrap();
2244            let err_cpuid = vcpu.get_cpuid2(KVM_MAX_CPUID_ENTRIES + 1_usize).err();
2245            assert_eq!(err_cpuid.unwrap().errno(), libc::ENOMEM);
2246        }
2247    }
2248
2249    #[cfg(target_arch = "x86_64")]
2250    #[test]
2251    fn test_get_cpuid_fail_num_entries_too_small() {
2252        let kvm = Kvm::new().unwrap();
2253        if kvm.check_extension(Cap::ExtCpuid) {
2254            let vm = kvm.create_vm().unwrap();
2255            let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2256            let ncpuids = cpuid.as_slice().len();
2257            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2258            let nr_vcpus = kvm.get_nr_vcpus();
2259            for cpu_idx in 0..nr_vcpus {
2260                let vcpu = vm.create_vcpu(cpu_idx as u64).unwrap();
2261                vcpu.set_cpuid2(&cpuid).unwrap();
2262                let err = vcpu.get_cpuid2(ncpuids - 1_usize).err();
2263                assert_eq!(err.unwrap().errno(), libc::E2BIG);
2264            }
2265        }
2266    }
2267
2268    #[cfg(target_arch = "x86_64")]
2269    #[test]
2270    fn test_set_cpuid() {
2271        let kvm = Kvm::new().unwrap();
2272        if kvm.check_extension(Cap::ExtCpuid) {
2273            let vm = kvm.create_vm().unwrap();
2274            let mut cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
2275            let ncpuids = cpuid.as_slice().len();
2276            assert!(ncpuids <= KVM_MAX_CPUID_ENTRIES);
2277            let vcpu = vm.create_vcpu(0).unwrap();
2278
2279            // Setting Manufacturer ID
2280            {
2281                let entries = cpuid.as_mut_slice();
2282                for entry in entries.iter_mut() {
2283                    if entry.function == 0 {
2284                        // " KVMKVMKVM "
2285                        entry.ebx = 0x4b4d564b;
2286                        entry.ecx = 0x564b4d56;
2287                        entry.edx = 0x4d;
2288                    }
2289                }
2290            }
2291            vcpu.set_cpuid2(&cpuid).unwrap();
2292            let cpuid_0 = vcpu.get_cpuid2(ncpuids).unwrap();
2293            for entry in cpuid_0.as_slice() {
2294                if entry.function == 0 {
2295                    assert_eq!(entry.ebx, 0x4b4d564b);
2296                    assert_eq!(entry.ecx, 0x564b4d56);
2297                    assert_eq!(entry.edx, 0x4d);
2298                }
2299            }
2300
2301            // Disabling Intel SHA extensions.
2302            const EBX_SHA_SHIFT: u32 = 29;
2303            let mut ebx_sha_off = 0u32;
2304            {
2305                let entries = cpuid.as_mut_slice();
2306                for entry in entries.iter_mut() {
2307                    if entry.function == 7 && entry.ecx == 0 {
2308                        entry.ebx &= !(1 << EBX_SHA_SHIFT);
2309                        ebx_sha_off = entry.ebx;
2310                    }
2311                }
2312            }
2313            vcpu.set_cpuid2(&cpuid).unwrap();
2314            let cpuid_1 = vcpu.get_cpuid2(ncpuids).unwrap();
2315            for entry in cpuid_1.as_slice() {
2316                if entry.function == 7 && entry.ecx == 0 {
2317                    assert_eq!(entry.ebx, ebx_sha_off);
2318                }
2319            }
2320        }
2321    }
2322
2323    #[cfg(target_arch = "x86_64")]
2324    #[allow(non_snake_case)]
2325    #[test]
2326    fn test_fpu() {
2327        // as per https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/fpu/internal.h
2328        let KVM_FPU_CWD: usize = 0x37f;
2329        let KVM_FPU_MXCSR: usize = 0x1f80;
2330        let kvm = Kvm::new().unwrap();
2331        let vm = kvm.create_vm().unwrap();
2332        let vcpu = vm.create_vcpu(0).unwrap();
2333        let mut fpu: kvm_fpu = kvm_fpu {
2334            fcw: KVM_FPU_CWD as u16,
2335            mxcsr: KVM_FPU_MXCSR as u32,
2336            ..Default::default()
2337        };
2338
2339        fpu.fcw = KVM_FPU_CWD as u16;
2340        fpu.mxcsr = KVM_FPU_MXCSR as u32;
2341
2342        vcpu.set_fpu(&fpu).unwrap();
2343        assert_eq!(vcpu.get_fpu().unwrap().fcw, KVM_FPU_CWD as u16);
2344    }
2345
2346    #[cfg(target_arch = "x86_64")]
2347    #[test]
2348    fn lapic_test() {
2349        use std::io::Cursor;
2350        // We might get read of byteorder if we replace mem::transmute with something safer.
2351        use self::byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
2352        // As per https://github.com/torvalds/linux/arch/x86/kvm/lapic.c
2353        // Try to write and read the APIC_ICR (0x300) register which is non-read only and
2354        // one can simply write to it.
2355        let kvm = Kvm::new().unwrap();
2356        assert!(kvm.check_extension(Cap::Irqchip));
2357        let vm = kvm.create_vm().unwrap();
2358        // The get_lapic ioctl will fail if there is no irqchip created beforehand.
2359        vm.create_irq_chip().unwrap();
2360        let vcpu = vm.create_vcpu(0).unwrap();
2361        let mut klapic: kvm_lapic_state = vcpu.get_lapic().unwrap();
2362
2363        let reg_offset = 0x300;
2364        let value = 2_u32;
2365        //try to write and read the APIC_ICR	0x300
2366        let write_slice =
2367            unsafe { &mut *(&mut klapic.regs[reg_offset..] as *mut [i8] as *mut [u8]) };
2368        let mut writer = Cursor::new(write_slice);
2369        writer.write_u32::<LittleEndian>(value).unwrap();
2370        vcpu.set_lapic(&klapic).unwrap();
2371        klapic = vcpu.get_lapic().unwrap();
2372        let read_slice = unsafe { &*(&klapic.regs[reg_offset..] as *const [i8] as *const [u8]) };
2373        let mut reader = Cursor::new(read_slice);
2374        assert_eq!(reader.read_u32::<LittleEndian>().unwrap(), value);
2375    }
2376
2377    #[cfg(target_arch = "x86_64")]
2378    #[test]
2379    fn msrs_test() {
2380        use vmm_sys_util::fam::FamStruct;
2381        let kvm = Kvm::new().unwrap();
2382        let vm = kvm.create_vm().unwrap();
2383        let vcpu = vm.create_vcpu(0).unwrap();
2384
2385        // Set the following MSRs.
2386        let msrs_to_set = [
2387            kvm_msr_entry {
2388                index: 0x0000_0174,
2389                data: 0x0,
2390                ..Default::default()
2391            },
2392            kvm_msr_entry {
2393                index: 0x0000_0175,
2394                data: 0x1,
2395                ..Default::default()
2396            },
2397        ];
2398        let msrs_wrapper = Msrs::from_entries(&msrs_to_set).unwrap();
2399        vcpu.set_msrs(&msrs_wrapper).unwrap();
2400
2401        // Now test that GET_MSRS returns the same.
2402        // Configure the struct to say which entries we want.
2403        let mut returned_kvm_msrs = Msrs::from_entries(&[
2404            kvm_msr_entry {
2405                index: 0x0000_0174,
2406                ..Default::default()
2407            },
2408            kvm_msr_entry {
2409                index: 0x0000_0175,
2410                ..Default::default()
2411            },
2412        ])
2413        .unwrap();
2414        let nmsrs = vcpu.get_msrs(&mut returned_kvm_msrs).unwrap();
2415
2416        // Verify the lengths match.
2417        assert_eq!(nmsrs, msrs_to_set.len());
2418        assert_eq!(nmsrs, returned_kvm_msrs.as_fam_struct_ref().len());
2419
2420        // Verify the contents match.
2421        let returned_kvm_msr_entries = returned_kvm_msrs.as_slice();
2422        for (i, entry) in returned_kvm_msr_entries.iter().enumerate() {
2423            assert_eq!(entry, &msrs_to_set[i]);
2424        }
2425    }
2426
2427    #[cfg(any(
2428        target_arch = "x86_64",
2429        target_arch = "aarch64",
2430        target_arch = "riscv64",
2431        target_arch = "s390x"
2432    ))]
2433    #[test]
2434    fn mpstate_test() {
2435        let kvm = Kvm::new().unwrap();
2436        let vm = kvm.create_vm().unwrap();
2437        let vcpu = vm.create_vcpu(0).unwrap();
2438        let mp_state = vcpu.get_mp_state().unwrap();
2439        vcpu.set_mp_state(mp_state).unwrap();
2440        let other_mp_state = vcpu.get_mp_state().unwrap();
2441        assert_eq!(mp_state, other_mp_state);
2442    }
2443
2444    #[cfg(target_arch = "x86_64")]
2445    #[test]
2446    fn xsave_test() {
2447        use vmm_sys_util::fam::FamStruct;
2448
2449        let kvm = Kvm::new().unwrap();
2450        let vm = kvm.create_vm().unwrap();
2451        let vcpu = vm.create_vcpu(0).unwrap();
2452        let xsave = vcpu.get_xsave().unwrap();
2453        // SAFETY: Safe because no features are enabled dynamically and `xsave` is large enough.
2454        unsafe { vcpu.set_xsave(&xsave).unwrap() };
2455        let other_xsave = vcpu.get_xsave().unwrap();
2456        assert_eq!(&xsave.region[..], &other_xsave.region[..]);
2457
2458        let xsave_size = vm.check_extension_int(Cap::Xsave2);
2459        // only if KVM_CAP_XSAVE2 is supported
2460        if xsave_size > 0 {
2461            let fam_size = (xsave_size as usize - std::mem::size_of::<kvm_xsave>())
2462                .div_ceil(std::mem::size_of::<<kvm_xsave2 as FamStruct>::Entry>());
2463            let mut xsave2 = Xsave::new(fam_size).unwrap();
2464            // SAFETY: Safe because `xsave2` is allocated with enough space.
2465            unsafe { vcpu.get_xsave2(&mut xsave2).unwrap() };
2466            assert_eq!(
2467                &xsave.region[..],
2468                &xsave2.as_fam_struct_ref().xsave.region[..]
2469            );
2470            // SAFETY: Safe because `xsave2` is allocated with enough space.
2471            unsafe { vcpu.set_xsave2(&xsave2).unwrap() };
2472        }
2473    }
2474
2475    #[cfg(target_arch = "x86_64")]
2476    #[test]
2477    fn xcrs_test() {
2478        let kvm = Kvm::new().unwrap();
2479        let vm = kvm.create_vm().unwrap();
2480        let vcpu = vm.create_vcpu(0).unwrap();
2481        let xcrs = vcpu.get_xcrs().unwrap();
2482        vcpu.set_xcrs(&xcrs).unwrap();
2483        let other_xcrs = vcpu.get_xcrs().unwrap();
2484        assert_eq!(xcrs, other_xcrs);
2485    }
2486
2487    #[cfg(target_arch = "x86_64")]
2488    #[test]
2489    fn debugregs_test() {
2490        let kvm = Kvm::new().unwrap();
2491        let vm = kvm.create_vm().unwrap();
2492        let vcpu = vm.create_vcpu(0).unwrap();
2493        let debugregs = vcpu.get_debug_regs().unwrap();
2494        vcpu.set_debug_regs(&debugregs).unwrap();
2495        let other_debugregs = vcpu.get_debug_regs().unwrap();
2496        assert_eq!(debugregs, other_debugregs);
2497    }
2498
2499    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
2500    #[test]
2501    fn vcpu_events_test() {
2502        let kvm = Kvm::new().unwrap();
2503        if kvm.check_extension(Cap::VcpuEvents) {
2504            let vm = kvm.create_vm().unwrap();
2505            let vcpu = vm.create_vcpu(0).unwrap();
2506            let vcpu_events = vcpu.get_vcpu_events().unwrap();
2507            vcpu.set_vcpu_events(&vcpu_events).unwrap();
2508            let other_vcpu_events = vcpu.get_vcpu_events().unwrap();
2509            assert_eq!(vcpu_events, other_vcpu_events);
2510        }
2511    }
2512
2513    #[cfg(target_arch = "aarch64")]
2514    #[test]
2515    fn test_run_code() {
2516        use std::io::Write;
2517
2518        let kvm = Kvm::new().unwrap();
2519        let vm = kvm.create_vm().unwrap();
2520        #[rustfmt::skip]
2521        let code = [
2522            0x40, 0x20, 0x80, 0x52, /* mov w0, #0x102 */
2523            0x00, 0x01, 0x00, 0xb9, /* str w0, [x8]; test physical memory write */
2524            0x81, 0x60, 0x80, 0x52, /* mov w1, #0x304 */
2525            0x02, 0x00, 0x80, 0x52, /* mov w2, #0x0 */
2526            0x20, 0x01, 0x40, 0xb9, /* ldr w0, [x9]; test MMIO read */
2527            0x1f, 0x18, 0x14, 0x71, /* cmp w0, #0x506 */
2528            0x20, 0x00, 0x82, 0x1a, /* csel w0, w1, w2, eq */
2529            0x20, 0x01, 0x00, 0xb9, /* str w0, [x9]; test MMIO write */
2530            0x00, 0x80, 0xb0, 0x52, /* mov w0, #0x84000000 */
2531            0x00, 0x00, 0x1d, 0x32, /* orr w0, w0, #0x08 */
2532            0x02, 0x00, 0x00, 0xd4, /* hvc #0x0 */
2533            0x00, 0x00, 0x00, 0x14, /* b <this address>; shouldn't get here, but if so loop forever */
2534        ];
2535
2536        let mem_size = 0x20000;
2537        let load_addr = mmap_anonymous(mem_size).as_ptr();
2538        let guest_addr: u64 = 0x10000;
2539        let slot: u32 = 0;
2540        let mem_region = kvm_userspace_memory_region {
2541            slot,
2542            guest_phys_addr: guest_addr,
2543            memory_size: mem_size as u64,
2544            userspace_addr: load_addr as u64,
2545            flags: KVM_MEM_LOG_DIRTY_PAGES,
2546        };
2547        unsafe {
2548            vm.set_user_memory_region(mem_region).unwrap();
2549        }
2550
2551        unsafe {
2552            // Get a mutable slice of `mem_size` from `load_addr`.
2553            // This is safe because we mapped it before.
2554            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2555            slice.write_all(&code).unwrap();
2556        }
2557
2558        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2559        let mut kvi = kvm_vcpu_init::default();
2560        vm.get_preferred_target(&mut kvi).unwrap();
2561        kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
2562        vcpu_fd.vcpu_init(&kvi).unwrap();
2563
2564        let core_reg_base: u64 = 0x6030_0000_0010_0000;
2565        let mmio_addr: u64 = guest_addr + mem_size as u64;
2566
2567        // Set the PC to the guest address where we loaded the code.
2568        vcpu_fd
2569            .set_one_reg(core_reg_base + 2 * 32, &(guest_addr as u128).to_le_bytes())
2570            .unwrap();
2571
2572        // Set x8 and x9 to the addresses the guest test code needs
2573        vcpu_fd
2574            .set_one_reg(
2575                core_reg_base + 2 * 8,
2576                &(guest_addr as u128 + 0x10000).to_le_bytes(),
2577            )
2578            .unwrap();
2579        vcpu_fd
2580            .set_one_reg(core_reg_base + 2 * 9, &(mmio_addr as u128).to_le_bytes())
2581            .unwrap();
2582
2583        loop {
2584            match vcpu_fd.run().expect("run failed") {
2585                VcpuExit::MmioRead(addr, data) => {
2586                    assert_eq!(addr, mmio_addr);
2587                    assert_eq!(data.len(), 4);
2588                    data[3] = 0x0;
2589                    data[2] = 0x0;
2590                    data[1] = 0x5;
2591                    data[0] = 0x6;
2592                }
2593                VcpuExit::MmioWrite(addr, data) => {
2594                    assert_eq!(addr, mmio_addr);
2595                    assert_eq!(data.len(), 4);
2596                    assert_eq!(data[3], 0x0);
2597                    assert_eq!(data[2], 0x0);
2598                    assert_eq!(data[1], 0x3);
2599                    assert_eq!(data[0], 0x4);
2600                    // The code snippet dirties one page at guest_addr + 0x10000.
2601                    // The code page should not be dirty, as it's not written by the guest.
2602                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2603                    let dirty_pages: u32 = dirty_pages_bitmap
2604                        .into_iter()
2605                        .map(|page| page.count_ones())
2606                        .sum();
2607                    assert_eq!(dirty_pages, 1);
2608                }
2609                VcpuExit::SystemEvent(type_, data) => {
2610                    assert_eq!(type_, KVM_SYSTEM_EVENT_SHUTDOWN);
2611                    assert_eq!(data[0], 0);
2612                    break;
2613                }
2614                r => panic!("unexpected exit reason: {:?}", r),
2615            }
2616        }
2617    }
2618
2619    #[cfg(target_arch = "riscv64")]
2620    #[test]
2621    fn test_run_code() {
2622        use std::io::Write;
2623
2624        let kvm = Kvm::new().unwrap();
2625        let vm = kvm.create_vm().unwrap();
2626        #[rustfmt::skip]
2627        let code = [
2628            0x13, 0x05, 0x50, 0x40, // li   a0, 0x0405;
2629            0x23, 0x20, 0xac, 0x00, // sw   a0, 0(s8);  test physical memory write
2630            0x03, 0xa5, 0x0c, 0x00, // lw   a0, 0(s9);  test MMIO read
2631            0x93, 0x05, 0x70, 0x60, // li   a1, 0x0607;
2632            0x23, 0xa0, 0xbc, 0x00, // sw   a1, 0(s9);  test MMIO write
2633            0x6f, 0x00, 0x00, 0x00, // j .; shouldn't get here, but if so loop forever
2634        ];
2635
2636        let mem_size = 0x20000;
2637        let load_addr = mmap_anonymous(mem_size).as_ptr();
2638        let guest_addr: u64 = 0x10000;
2639        let slot: u32 = 0;
2640        let mem_region = kvm_userspace_memory_region {
2641            slot,
2642            guest_phys_addr: guest_addr,
2643            memory_size: mem_size as u64,
2644            userspace_addr: load_addr as u64,
2645            flags: KVM_MEM_LOG_DIRTY_PAGES,
2646        };
2647        unsafe {
2648            vm.set_user_memory_region(mem_region).unwrap();
2649        }
2650
2651        unsafe {
2652            // Get a mutable slice of `mem_size` from `load_addr`.
2653            // This is safe because we mapped it before.
2654            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2655            slice.write_all(&code).unwrap();
2656        }
2657
2658        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2659
2660        let core_reg_base: u64 = 0x8030_0000_0200_0000;
2661        let mmio_addr: u64 = guest_addr + mem_size as u64;
2662
2663        // Set the PC to the guest address where we loaded the code.
2664        vcpu_fd
2665            .set_one_reg(core_reg_base, &(guest_addr as u128).to_le_bytes())
2666            .unwrap();
2667
2668        // Set s8 and s9 to the addresses the guest test code needs
2669        vcpu_fd
2670            .set_one_reg(
2671                core_reg_base + 24,
2672                &(guest_addr as u128 + 0x10000).to_le_bytes(),
2673            )
2674            .unwrap();
2675        vcpu_fd
2676            .set_one_reg(core_reg_base + 25, &(mmio_addr as u128).to_le_bytes())
2677            .unwrap();
2678
2679        loop {
2680            match vcpu_fd.run().expect("run failed") {
2681                VcpuExit::MmioRead(addr, data) => {
2682                    assert_eq!(addr, mmio_addr);
2683                    assert_eq!(data.len(), 4);
2684                    data[3] = 0x0;
2685                    data[2] = 0x0;
2686                    data[1] = 0x5;
2687                    data[0] = 0x6;
2688                }
2689                VcpuExit::MmioWrite(addr, data) => {
2690                    assert_eq!(addr, mmio_addr);
2691                    assert_eq!(data.len(), 4);
2692                    assert_eq!(data[3], 0x0);
2693                    assert_eq!(data[2], 0x0);
2694                    assert_eq!(data[1], 0x6);
2695                    assert_eq!(data[0], 0x7);
2696                    // The code snippet dirties one page at guest_addr + 0x10000.
2697                    // The code page should not be dirty, as it's not written by the guest.
2698                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2699                    let dirty_pages: u32 = dirty_pages_bitmap
2700                        .into_iter()
2701                        .map(|page| page.count_ones())
2702                        .sum();
2703                    assert_eq!(dirty_pages, 1);
2704                    break;
2705                }
2706                r => panic!("unexpected exit reason: {:?}", r),
2707            }
2708        }
2709    }
2710
2711    #[cfg(target_arch = "x86_64")]
2712    #[test]
2713    fn test_run_code() {
2714        use std::io::Write;
2715
2716        let kvm = Kvm::new().unwrap();
2717        let vm = kvm.create_vm().unwrap();
2718        // This example is based on https://lwn.net/Articles/658511/
2719        #[rustfmt::skip]
2720        let code = [
2721            0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
2722            0x00, 0xd8, /* add %bl, %al */
2723            0x04, b'0', /* add $'0', %al */
2724            0xee, /* out %al, %dx */
2725            0xec, /* in %dx, %al */
2726            0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/
2727            0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/
2728            0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */
2729            0xf4, /* hlt */
2730        ];
2731        let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007];
2732
2733        let mem_size = 0x4000;
2734        let load_addr = mmap_anonymous(mem_size).as_ptr();
2735        let guest_addr: u64 = 0x1000;
2736        let slot: u32 = 0;
2737        let mem_region = kvm_userspace_memory_region {
2738            slot,
2739            guest_phys_addr: guest_addr,
2740            memory_size: mem_size as u64,
2741            userspace_addr: load_addr as u64,
2742            flags: KVM_MEM_LOG_DIRTY_PAGES,
2743        };
2744        unsafe {
2745            vm.set_user_memory_region(mem_region).unwrap();
2746        }
2747
2748        unsafe {
2749            // Get a mutable slice of `mem_size` from `load_addr`.
2750            // This is safe because we mapped it before.
2751            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
2752            slice.write_all(&code).unwrap();
2753        }
2754
2755        let mut vcpu_fd = vm.create_vcpu(0).unwrap();
2756
2757        let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap();
2758        assert_ne!(vcpu_sregs.cs.base, 0);
2759        assert_ne!(vcpu_sregs.cs.selector, 0);
2760        vcpu_sregs.cs.base = 0;
2761        vcpu_sregs.cs.selector = 0;
2762        vcpu_fd.set_sregs(&vcpu_sregs).unwrap();
2763
2764        let mut vcpu_regs = vcpu_fd.get_regs().unwrap();
2765        // Set the Instruction Pointer to the guest address where we loaded the code.
2766        vcpu_regs.rip = guest_addr;
2767        vcpu_regs.rax = 2;
2768        vcpu_regs.rbx = 3;
2769        vcpu_regs.rflags = 2;
2770        vcpu_fd.set_regs(&vcpu_regs).unwrap();
2771
2772        let mut debug_struct = kvm_guest_debug {
2773            control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
2774            pad: 0,
2775            arch: kvm_guest_debug_arch {
2776                debugreg: [0, 0, 0, 0, 0, 0, 0, 0],
2777            },
2778        };
2779        vcpu_fd.set_guest_debug(&debug_struct).unwrap();
2780
2781        let mut instr_idx = 0;
2782        loop {
2783            match vcpu_fd.run().expect("run failed") {
2784                VcpuExit::IoIn(addr, data) => {
2785                    assert_eq!(addr, 0x3f8);
2786                    assert_eq!(data.len(), 1);
2787                }
2788                VcpuExit::IoOut(addr, data) => {
2789                    assert_eq!(addr, 0x3f8);
2790                    assert_eq!(data.len(), 1);
2791                    assert_eq!(data[0], b'5');
2792                }
2793                VcpuExit::MmioRead(addr, data) => {
2794                    assert_eq!(addr, 0x8000);
2795                    assert_eq!(data.len(), 1);
2796                }
2797                VcpuExit::MmioWrite(addr, data) => {
2798                    assert_eq!(addr, 0x8000);
2799                    assert_eq!(data.len(), 1);
2800                    assert_eq!(data[0], 0);
2801                }
2802                VcpuExit::Debug(debug) => {
2803                    if instr_idx == expected_rips.len() - 1 {
2804                        // Disabling debugging/single-stepping
2805                        debug_struct.control = 0;
2806                        vcpu_fd.set_guest_debug(&debug_struct).unwrap();
2807                    } else if instr_idx >= expected_rips.len() {
2808                        unreachable!();
2809                    }
2810                    let vcpu_regs = vcpu_fd.get_regs().unwrap();
2811                    assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]);
2812                    assert_eq!(debug.exception, 1);
2813                    assert_eq!(debug.pc, expected_rips[instr_idx]);
2814                    // Check first 15 bits of DR6
2815                    let mask = (1 << 16) - 1;
2816                    assert_eq!(debug.dr6 & mask, 0b100111111110000);
2817                    // Bit 10 in DR7 is always 1
2818                    assert_eq!(debug.dr7, 1 << 10);
2819                    instr_idx += 1;
2820                }
2821                VcpuExit::Hlt => {
2822                    // The code snippet dirties 2 pages:
2823                    // * one when the code itself is loaded in memory;
2824                    // * and one more from the `movl` that writes to address 0x8000
2825                    let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap();
2826                    let dirty_pages: u32 = dirty_pages_bitmap
2827                        .into_iter()
2828                        .map(|page| page.count_ones())
2829                        .sum();
2830                    assert_eq!(dirty_pages, 2);
2831                    break;
2832                }
2833                r => panic!("unexpected exit reason: {:?}", r),
2834            }
2835        }
2836    }
2837
2838    #[test]
2839    #[cfg(target_arch = "aarch64")]
2840    fn test_get_preferred_target() {
2841        let kvm = Kvm::new().unwrap();
2842        let vm = kvm.create_vm().unwrap();
2843        let vcpu = vm.create_vcpu(0).unwrap();
2844
2845        let mut kvi = kvm_vcpu_init::default();
2846
2847        vm.get_preferred_target(&mut kvi)
2848            .expect("Cannot get preferred target");
2849        vcpu.vcpu_init(&kvi).unwrap();
2850    }
2851
2852    #[test]
2853    #[cfg(target_arch = "aarch64")]
2854    fn test_set_one_reg() {
2855        let kvm = Kvm::new().unwrap();
2856        let vm = kvm.create_vm().unwrap();
2857        let vcpu = vm.create_vcpu(0).unwrap();
2858
2859        let mut kvi = kvm_vcpu_init::default();
2860        vm.get_preferred_target(&mut kvi)
2861            .expect("Cannot get preferred target");
2862        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2863        let data: u128 = 0;
2864        let reg_id: u64 = 0;
2865
2866        vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err();
2867        // Exercising KVM_SET_ONE_REG by trying to alter the data inside the PSTATE register (which is a
2868        // specific aarch64 register).
2869        // This regiseter is 64 bit wide (8 bytes).
2870        const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042;
2871        vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes())
2872            .expect("Failed to set pstate register");
2873
2874        // Trying to set 8 byte register with 7 bytes must fail.
2875        vcpu.set_one_reg(PSTATE_REG_ID, &[0_u8; 7]).unwrap_err();
2876    }
2877
2878    #[test]
2879    #[cfg(target_arch = "aarch64")]
2880    fn test_get_one_reg() {
2881        let kvm = Kvm::new().unwrap();
2882        let vm = kvm.create_vm().unwrap();
2883        let vcpu = vm.create_vcpu(0).unwrap();
2884
2885        let mut kvi = kvm_vcpu_init::default();
2886        vm.get_preferred_target(&mut kvi)
2887            .expect("Cannot get preferred target");
2888        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2889
2890        // PSR (Processor State Register) bits.
2891        // Taken from arch/arm64/include/uapi/asm/ptrace.h.
2892        const PSR_MODE_EL1H: u64 = 0x0000_0005;
2893        const PSR_F_BIT: u64 = 0x0000_0040;
2894        const PSR_I_BIT: u64 = 0x0000_0080;
2895        const PSR_A_BIT: u64 = 0x0000_0100;
2896        const PSR_D_BIT: u64 = 0x0000_0200;
2897        const PSTATE_FAULT_BITS_64: u64 =
2898            PSR_MODE_EL1H | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
2899        let data: u128 = PSTATE_FAULT_BITS_64 as u128;
2900        const PSTATE_REG_ID: u64 = 0x6030_0000_0010_0042;
2901        vcpu.set_one_reg(PSTATE_REG_ID, &data.to_le_bytes())
2902            .expect("Failed to set pstate register");
2903
2904        let mut bytes = [0_u8; 16];
2905        vcpu.get_one_reg(PSTATE_REG_ID, &mut bytes)
2906            .expect("Failed to get pstate register");
2907        let data = u128::from_le_bytes(bytes);
2908        assert_eq!(data, PSTATE_FAULT_BITS_64 as u128);
2909
2910        // Trying to get 8 byte register with 7 bytes must fail.
2911        vcpu.get_one_reg(PSTATE_REG_ID, &mut [0_u8; 7]).unwrap_err();
2912    }
2913
2914    #[test]
2915    #[cfg(target_arch = "aarch64")]
2916    fn test_get_reg_list() {
2917        let kvm = Kvm::new().unwrap();
2918        let vm = kvm.create_vm().unwrap();
2919        let vcpu = vm.create_vcpu(0).unwrap();
2920
2921        let mut reg_list = RegList::new(1).unwrap();
2922        // KVM_GET_REG_LIST demands that the vcpus be initalized, so we expect this to fail.
2923        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
2924        assert!(err.errno() == libc::ENOEXEC);
2925
2926        let mut kvi = kvm_vcpu_init::default();
2927        vm.get_preferred_target(&mut kvi)
2928            .expect("Cannot get preferred target");
2929        vcpu.vcpu_init(&kvi).expect("Cannot initialize vcpu");
2930
2931        // KVM_GET_REG_LIST offers us a number of registers for which we have
2932        // not allocated memory, so the first time it fails.
2933        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
2934        assert!(err.errno() == libc::E2BIG);
2935        // SAFETY: This structure is a result from a specific vCPU ioctl
2936        assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0);
2937
2938        // We make use of the number of registers returned to allocate memory and
2939        // try one more time.
2940        // SAFETY: This structure is a result from a specific vCPU ioctl
2941        let mut reg_list =
2942            RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap();
2943        vcpu.get_reg_list(&mut reg_list).unwrap()
2944    }
2945
2946    #[test]
2947    #[cfg(target_arch = "riscv64")]
2948    fn test_set_one_reg() {
2949        let kvm = Kvm::new().unwrap();
2950        let vm = kvm.create_vm().unwrap();
2951        let vcpu = vm.create_vcpu(0).unwrap();
2952
2953        let data: u128 = 0;
2954        let reg_id: u64 = 0;
2955
2956        vcpu.set_one_reg(reg_id, &data.to_le_bytes()).unwrap_err();
2957        // Exercising KVM_SET_ONE_REG by trying to alter the data inside the A0
2958        // register.
2959        // This regiseter is 64 bit wide (8 bytes).
2960        const A0_REG_ID: u64 = 0x8030_0000_0200_000a;
2961        vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes())
2962            .expect("Failed to set a0 register");
2963
2964        // Trying to set 8 byte register with 7 bytes must fail.
2965        vcpu.set_one_reg(A0_REG_ID, &[0_u8; 7]).unwrap_err();
2966    }
2967
2968    #[test]
2969    #[cfg(target_arch = "riscv64")]
2970    fn test_get_one_reg() {
2971        let kvm = Kvm::new().unwrap();
2972        let vm = kvm.create_vm().unwrap();
2973        let vcpu = vm.create_vcpu(0).unwrap();
2974
2975        const PRESET: u64 = 0x7;
2976        let data: u128 = PRESET as u128;
2977        const A0_REG_ID: u64 = 0x8030_0000_0200_000a;
2978        vcpu.set_one_reg(A0_REG_ID, &data.to_le_bytes())
2979            .expect("Failed to set a0 register");
2980
2981        let mut bytes = [0_u8; 16];
2982        vcpu.get_one_reg(A0_REG_ID, &mut bytes)
2983            .expect("Failed to get a0 register");
2984        let data = u128::from_le_bytes(bytes);
2985        assert_eq!(data, PRESET as u128);
2986
2987        // Trying to get 8 byte register with 7 bytes must fail.
2988        vcpu.get_one_reg(A0_REG_ID, &mut [0_u8; 7]).unwrap_err();
2989    }
2990
2991    #[test]
2992    #[cfg(target_arch = "riscv64")]
2993    fn test_get_reg_list() {
2994        let kvm = Kvm::new().unwrap();
2995        let vm = kvm.create_vm().unwrap();
2996        let vcpu = vm.create_vcpu(0).unwrap();
2997
2998        let mut reg_list = RegList::new(1).unwrap();
2999
3000        // KVM_GET_REG_LIST offers us a number of registers for which we have
3001        // not allocated memory, so the first time it fails.
3002        let err = vcpu.get_reg_list(&mut reg_list).unwrap_err();
3003        assert!(err.errno() == libc::E2BIG);
3004        // SAFETY: This structure is a result from a specific vCPU ioctl
3005        assert!(unsafe { reg_list.as_mut_fam_struct() }.n > 0);
3006
3007        // We make use of the number of registers returned to allocate memory and
3008        // try one more time.
3009        // SAFETY: This structure is a result from a specific vCPU ioctl
3010        let mut reg_list =
3011            RegList::new(unsafe { reg_list.as_mut_fam_struct() }.n as usize).unwrap();
3012        vcpu.get_reg_list(&mut reg_list).unwrap();
3013
3014        // Test get a register list contains 200 registers explicitly
3015        let mut reg_list = RegList::new(200).unwrap();
3016        vcpu.get_reg_list(&mut reg_list).unwrap();
3017    }
3018
3019    #[test]
3020    fn test_get_kvm_run() {
3021        let kvm = Kvm::new().unwrap();
3022        let vm = kvm.create_vm().unwrap();
3023        let mut vcpu = vm.create_vcpu(0).unwrap();
3024        vcpu.kvm_run_ptr.as_mut_ref().immediate_exit = 1;
3025        assert_eq!(vcpu.get_kvm_run().immediate_exit, 1);
3026    }
3027
3028    #[test]
3029    fn test_set_kvm_immediate_exit() {
3030        let kvm = Kvm::new().unwrap();
3031        let vm = kvm.create_vm().unwrap();
3032        let mut vcpu = vm.create_vcpu(0).unwrap();
3033        assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 0);
3034        vcpu.set_kvm_immediate_exit(1);
3035        assert_eq!(vcpu.kvm_run_ptr.as_ref().immediate_exit, 1);
3036    }
3037
3038    #[test]
3039    #[cfg(target_arch = "x86_64")]
3040    fn test_enable_cap() {
3041        let kvm = Kvm::new().unwrap();
3042        let vm = kvm.create_vm().unwrap();
3043        let mut cap = kvm_enable_cap {
3044            // KVM_CAP_HYPERV_SYNIC needs KVM_CAP_SPLIT_IRQCHIP enabled
3045            cap: KVM_CAP_SPLIT_IRQCHIP,
3046            ..Default::default()
3047        };
3048        cap.args[0] = 24;
3049        vm.enable_cap(&cap).unwrap();
3050
3051        let vcpu = vm.create_vcpu(0).unwrap();
3052        if kvm.check_extension(Cap::HypervSynic) {
3053            let cap = kvm_enable_cap {
3054                cap: KVM_CAP_HYPERV_SYNIC,
3055                ..Default::default()
3056            };
3057            vcpu.enable_cap(&cap).unwrap();
3058        }
3059    }
3060    #[cfg(target_arch = "x86_64")]
3061    #[test]
3062    fn test_get_tsc_khz() {
3063        let kvm = Kvm::new().unwrap();
3064        let vm = kvm.create_vm().unwrap();
3065        let vcpu = vm.create_vcpu(0).unwrap();
3066
3067        if !kvm.check_extension(Cap::GetTscKhz) {
3068            vcpu.get_tsc_khz().unwrap_err();
3069        } else {
3070            assert!(vcpu.get_tsc_khz().unwrap() > 0);
3071        }
3072    }
3073
3074    #[cfg(target_arch = "x86_64")]
3075    #[test]
3076    fn test_set_tsc_khz() {
3077        let kvm = Kvm::new().unwrap();
3078        let vm = kvm.create_vm().unwrap();
3079        let vcpu = vm.create_vcpu(0).unwrap();
3080        let freq = vcpu.get_tsc_khz().unwrap();
3081
3082        if !(kvm.check_extension(Cap::GetTscKhz) && kvm.check_extension(Cap::TscControl)) {
3083            vcpu.set_tsc_khz(0).unwrap_err();
3084        } else {
3085            vcpu.set_tsc_khz(freq - 500000).unwrap();
3086            assert_eq!(vcpu.get_tsc_khz().unwrap(), freq - 500000);
3087            vcpu.set_tsc_khz(freq + 500000).unwrap();
3088            assert_eq!(vcpu.get_tsc_khz().unwrap(), freq + 500000);
3089        }
3090    }
3091
3092    #[cfg(target_arch = "x86_64")]
3093    #[test]
3094    fn test_sync_regs() {
3095        let kvm = Kvm::new().unwrap();
3096        let vm = kvm.create_vm().unwrap();
3097        let mut vcpu = vm.create_vcpu(0).unwrap();
3098
3099        // Test setting each valid register
3100        let sync_regs = [
3101            SyncReg::Register,
3102            SyncReg::SystemRegister,
3103            SyncReg::VcpuEvents,
3104        ];
3105        for reg in &sync_regs {
3106            vcpu.set_sync_valid_reg(*reg);
3107            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, *reg as u64);
3108            vcpu.clear_sync_valid_reg(*reg);
3109            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_valid_regs, 0);
3110        }
3111
3112        // Test that multiple valid SyncRegs can be set at the same time
3113        vcpu.set_sync_valid_reg(SyncReg::Register);
3114        vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
3115        vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
3116        assert_eq!(
3117            vcpu.kvm_run_ptr.as_ref().kvm_valid_regs,
3118            SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64
3119        );
3120
3121        // Test setting each dirty register
3122        let sync_regs = [
3123            SyncReg::Register,
3124            SyncReg::SystemRegister,
3125            SyncReg::VcpuEvents,
3126        ];
3127
3128        for reg in &sync_regs {
3129            vcpu.set_sync_dirty_reg(*reg);
3130            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, *reg as u64);
3131            vcpu.clear_sync_dirty_reg(*reg);
3132            assert_eq!(vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs, 0);
3133        }
3134
3135        // Test that multiple dirty SyncRegs can be set at the same time
3136        vcpu.set_sync_dirty_reg(SyncReg::Register);
3137        vcpu.set_sync_dirty_reg(SyncReg::SystemRegister);
3138        vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents);
3139        assert_eq!(
3140            vcpu.kvm_run_ptr.as_ref().kvm_dirty_regs,
3141            SyncReg::Register as u64 | SyncReg::SystemRegister as u64 | SyncReg::VcpuEvents as u64
3142        );
3143    }
3144
3145    #[cfg(target_arch = "x86_64")]
3146    #[test]
3147    fn test_sync_regs_with_run() {
3148        use std::io::Write;
3149
3150        let kvm = Kvm::new().unwrap();
3151        let vm = kvm.create_vm().unwrap();
3152        if kvm.check_extension(Cap::SyncRegs) {
3153            // This example is based on https://lwn.net/Articles/658511/
3154            #[rustfmt::skip]
3155            let code = [
3156                0xff, 0xc0, /* inc eax */
3157                0xf4, /* hlt */
3158            ];
3159
3160            let mem_size = 0x4000;
3161            let load_addr = mmap_anonymous(mem_size).as_ptr();
3162            let guest_addr: u64 = 0x1000;
3163            let slot: u32 = 0;
3164            let mem_region = kvm_userspace_memory_region {
3165                slot,
3166                guest_phys_addr: guest_addr,
3167                memory_size: mem_size as u64,
3168                userspace_addr: load_addr as u64,
3169                flags: KVM_MEM_LOG_DIRTY_PAGES,
3170            };
3171            unsafe {
3172                vm.set_user_memory_region(mem_region).unwrap();
3173            }
3174
3175            unsafe {
3176                // Get a mutable slice of `mem_size` from `load_addr`.
3177                // This is safe because we mapped it before.
3178                let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3179                slice.write_all(&code).unwrap();
3180            }
3181
3182            let mut vcpu = vm.create_vcpu(0).unwrap();
3183
3184            let orig_sregs = vcpu.get_sregs().unwrap();
3185
3186            let sync_regs = vcpu.sync_regs_mut();
3187
3188            // Initialize the sregs in sync_regs to be the original sregs
3189            sync_regs.sregs = orig_sregs;
3190            sync_regs.sregs.cs.base = 0;
3191            sync_regs.sregs.cs.selector = 0;
3192
3193            // Set up the guest to attempt to `inc rax`
3194            sync_regs.regs.rip = guest_addr;
3195            sync_regs.regs.rax = 0x8000;
3196            sync_regs.regs.rflags = 2;
3197
3198            // Initialize the sync_reg flags
3199            vcpu.set_sync_valid_reg(SyncReg::Register);
3200            vcpu.set_sync_valid_reg(SyncReg::SystemRegister);
3201            vcpu.set_sync_valid_reg(SyncReg::VcpuEvents);
3202            vcpu.set_sync_dirty_reg(SyncReg::Register);
3203            vcpu.set_sync_dirty_reg(SyncReg::SystemRegister);
3204            vcpu.set_sync_dirty_reg(SyncReg::VcpuEvents);
3205
3206            // hlt is the only expected return from guest execution
3207            assert!(matches!(vcpu.run().expect("run failed"), VcpuExit::Hlt));
3208
3209            let regs = vcpu.get_regs().unwrap();
3210
3211            let sync_regs = vcpu.sync_regs();
3212            assert_eq!(regs, sync_regs.regs);
3213            assert_eq!(sync_regs.regs.rax, 0x8001);
3214        }
3215    }
3216
3217    #[test]
3218    #[cfg(target_arch = "x86_64")]
3219    fn test_translate_gva() {
3220        let kvm = Kvm::new().unwrap();
3221        let vm = kvm.create_vm().unwrap();
3222        let vcpu = vm.create_vcpu(0).unwrap();
3223        vcpu.translate_gva(0x10000).unwrap();
3224        assert_eq!(vcpu.translate_gva(0x10000).unwrap().valid, 1);
3225        assert_eq!(
3226            vcpu.translate_gva(0x10000).unwrap().physical_address,
3227            0x10000
3228        );
3229        vcpu.translate_gva(u64::MAX).unwrap();
3230        assert_eq!(vcpu.translate_gva(u64::MAX).unwrap().valid, 0);
3231    }
3232
3233    #[test]
3234    #[cfg(target_arch = "aarch64")]
3235    fn test_vcpu_attr() {
3236        let kvm = Kvm::new().unwrap();
3237        let vm = kvm.create_vm().unwrap();
3238        let vcpu = vm.create_vcpu(0).unwrap();
3239
3240        let dist_attr = kvm_device_attr {
3241            group: KVM_ARM_VCPU_PMU_V3_CTRL,
3242            attr: u64::from(KVM_ARM_VCPU_PMU_V3_INIT),
3243            addr: 0x0,
3244            flags: 0,
3245        };
3246
3247        vcpu.has_device_attr(&dist_attr).unwrap_err();
3248        vcpu.set_device_attr(&dist_attr).unwrap_err();
3249        let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default();
3250        vm.get_preferred_target(&mut kvi)
3251            .expect("Cannot get preferred target");
3252        kvi.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2) | (1 << KVM_ARM_VCPU_PMU_V3);
3253        vcpu.vcpu_init(&kvi).unwrap();
3254        vcpu.has_device_attr(&dist_attr).unwrap();
3255        vcpu.set_device_attr(&dist_attr).unwrap();
3256    }
3257
3258    #[test]
3259    #[cfg(target_arch = "aarch64")]
3260    fn test_pointer_authentication() {
3261        let kvm = Kvm::new().unwrap();
3262        let vm = kvm.create_vm().unwrap();
3263        let vcpu = vm.create_vcpu(0).unwrap();
3264
3265        let mut kvi = kvm_vcpu_init::default();
3266        vm.get_preferred_target(&mut kvi)
3267            .expect("Cannot get preferred target");
3268        if kvm.check_extension(Cap::ArmPtrAuthAddress) {
3269            kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
3270        }
3271        if kvm.check_extension(Cap::ArmPtrAuthGeneric) {
3272            kvi.features[0] |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
3273        }
3274        vcpu.vcpu_init(&kvi).unwrap();
3275    }
3276
3277    #[cfg(target_arch = "x86_64")]
3278    #[test]
3279    fn test_userspace_rdmsr_exit() {
3280        use std::io::Write;
3281
3282        let kvm = Kvm::new().unwrap();
3283        let vm = kvm.create_vm().unwrap();
3284        #[rustfmt::skip]
3285        let code = [
3286            0x0F, 0x32, /* rdmsr */
3287            0xF4        /* hlt */
3288        ];
3289
3290        if !vm.check_extension(Cap::X86UserSpaceMsr) {
3291            return;
3292        }
3293        let cap = kvm_enable_cap {
3294            cap: Cap::X86UserSpaceMsr as u32,
3295            args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0],
3296            ..Default::default()
3297        };
3298        vm.enable_cap(&cap).unwrap();
3299
3300        let mem_size = 0x4000;
3301        let load_addr = mmap_anonymous(mem_size).as_ptr();
3302        let guest_addr: u64 = 0x1000;
3303        let slot: u32 = 0;
3304        let mem_region = kvm_userspace_memory_region {
3305            slot,
3306            guest_phys_addr: guest_addr,
3307            memory_size: mem_size as u64,
3308            userspace_addr: load_addr as u64,
3309            flags: 0,
3310        };
3311        unsafe {
3312            vm.set_user_memory_region(mem_region).unwrap();
3313
3314            // Get a mutable slice of `mem_size` from `load_addr`.
3315            // This is safe because we mapped it before.
3316            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3317            slice.write_all(&code).unwrap();
3318        }
3319
3320        let mut vcpu = vm.create_vcpu(0).unwrap();
3321
3322        // Set up special registers
3323        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3324        assert_ne!(vcpu_sregs.cs.base, 0);
3325        assert_ne!(vcpu_sregs.cs.selector, 0);
3326        vcpu_sregs.cs.base = 0;
3327        vcpu_sregs.cs.selector = 0;
3328        vcpu.set_sregs(&vcpu_sregs).unwrap();
3329
3330        // Set the Instruction Pointer to the guest address where we loaded
3331        // the code, and RCX to the MSR to be read.
3332        let mut vcpu_regs = vcpu.get_regs().unwrap();
3333        vcpu_regs.rip = guest_addr;
3334        vcpu_regs.rcx = 0x474f4f00;
3335        vcpu.set_regs(&vcpu_regs).unwrap();
3336
3337        match vcpu.run().unwrap() {
3338            VcpuExit::X86Rdmsr(exit) => {
3339                assert_eq!(exit.reason, MsrExitReason::Unknown);
3340                assert_eq!(exit.index, 0x474f4f00);
3341            }
3342            e => panic!("Unexpected exit: {:?}", e),
3343        }
3344    }
3345
3346    #[cfg(target_arch = "x86_64")]
3347    #[test]
3348    fn test_userspace_hypercall_exit() {
3349        use std::io::Write;
3350
3351        let kvm = Kvm::new().unwrap();
3352        let vm = kvm.create_vm().unwrap();
3353
3354        // Use `vmcall` or `vmmcall` depending on what's supported.
3355        let cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
3356        let supports_vmcall = cpuid
3357            .as_slice()
3358            .iter()
3359            .find(|entry| entry.function == 1)
3360            .is_some_and(|entry| entry.ecx & (1 << 5) != 0);
3361        let supports_vmmcall = cpuid
3362            .as_slice()
3363            .iter()
3364            .find(|entry| entry.function == 0x8000_0001)
3365            .is_some_and(|entry| entry.ecx & (1 << 2) != 0);
3366        #[rustfmt::skip]
3367        let code = if supports_vmcall {
3368            [
3369                0x0F, 0x01, 0xC1, /* vmcall */
3370                0xF4              /* hlt */
3371            ]
3372        } else if supports_vmmcall {
3373            [
3374                0x0F, 0x01, 0xD9, /* vmmcall */
3375                0xF4              /* hlt */
3376            ]
3377        } else {
3378            return;
3379        };
3380
3381        if !vm.check_extension(Cap::ExitHypercall) {
3382            return;
3383        }
3384        const KVM_HC_MAP_GPA_RANGE: u64 = 12;
3385        let cap = kvm_enable_cap {
3386            cap: Cap::ExitHypercall as u32,
3387            args: [1 << KVM_HC_MAP_GPA_RANGE, 0, 0, 0],
3388            ..Default::default()
3389        };
3390        vm.enable_cap(&cap).unwrap();
3391
3392        let mem_size = 0x4000;
3393        let load_addr = mmap_anonymous(mem_size).as_ptr();
3394        let guest_addr: u64 = 0x1000;
3395        let slot: u32 = 0;
3396        let mem_region = kvm_userspace_memory_region {
3397            slot,
3398            guest_phys_addr: guest_addr,
3399            memory_size: mem_size as u64,
3400            userspace_addr: load_addr as u64,
3401            flags: 0,
3402        };
3403        unsafe {
3404            vm.set_user_memory_region(mem_region).unwrap();
3405
3406            // Get a mutable slice of `mem_size` from `load_addr`.
3407            // This is safe because we mapped it before.
3408            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3409            slice.write_all(&code).unwrap();
3410        }
3411
3412        let mut vcpu = vm.create_vcpu(0).unwrap();
3413
3414        // Set up special registers
3415        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3416        assert_ne!(vcpu_sregs.cs.base, 0);
3417        assert_ne!(vcpu_sregs.cs.selector, 0);
3418        vcpu_sregs.cs.base = 0;
3419        vcpu_sregs.cs.selector = 0;
3420        vcpu.set_sregs(&vcpu_sregs).unwrap();
3421
3422        // Set the Instruction Pointer to the guest address where we loaded
3423        // the code, and RCX to the MSR to be read.
3424        let mut vcpu_regs = vcpu.get_regs().unwrap();
3425        vcpu_regs.rip = guest_addr;
3426        vcpu_regs.rax = KVM_HC_MAP_GPA_RANGE;
3427        vcpu_regs.rbx = 0x1234000;
3428        vcpu_regs.rcx = 1;
3429        vcpu_regs.rdx = 0;
3430        vcpu.set_regs(&vcpu_regs).unwrap();
3431
3432        match vcpu.run().unwrap() {
3433            VcpuExit::Hypercall(exit) => {
3434                assert_eq!(exit.nr, KVM_HC_MAP_GPA_RANGE);
3435                assert_eq!(exit.args[0], 0x1234000);
3436                assert_eq!(exit.args[1], 1);
3437                assert_eq!(exit.args[2], 0);
3438            }
3439            e => panic!("Unexpected exit: {:?}", e),
3440        }
3441    }
3442
3443    #[cfg(target_arch = "x86_64")]
3444    #[test]
3445    fn test_userspace_wrmsr_exit() {
3446        use std::io::Write;
3447
3448        let kvm = Kvm::new().unwrap();
3449        let vm = kvm.create_vm().unwrap();
3450        #[rustfmt::skip]
3451        let code = [
3452            0x0F, 0x30, /* wrmsr */
3453            0xF4        /* hlt */
3454        ];
3455
3456        if !vm.check_extension(Cap::X86UserSpaceMsr) {
3457            return;
3458        }
3459        let cap = kvm_enable_cap {
3460            cap: Cap::X86UserSpaceMsr as u32,
3461            args: [MsrExitReason::Unknown.bits() as u64, 0, 0, 0],
3462            ..Default::default()
3463        };
3464        vm.enable_cap(&cap).unwrap();
3465
3466        let mem_size = 0x4000;
3467        let load_addr = mmap_anonymous(mem_size).as_ptr();
3468        let guest_addr: u64 = 0x1000;
3469        let slot: u32 = 0;
3470        let mem_region = kvm_userspace_memory_region {
3471            slot,
3472            guest_phys_addr: guest_addr,
3473            memory_size: mem_size as u64,
3474            userspace_addr: load_addr as u64,
3475            flags: 0,
3476        };
3477        unsafe {
3478            vm.set_user_memory_region(mem_region).unwrap();
3479
3480            // Get a mutable slice of `mem_size` from `load_addr`.
3481            // This is safe because we mapped it before.
3482            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3483            slice.write_all(&code).unwrap();
3484        }
3485
3486        let mut vcpu = vm.create_vcpu(0).unwrap();
3487
3488        // Set up special registers
3489        let mut vcpu_sregs = vcpu.get_sregs().unwrap();
3490        assert_ne!(vcpu_sregs.cs.base, 0);
3491        assert_ne!(vcpu_sregs.cs.selector, 0);
3492        vcpu_sregs.cs.base = 0;
3493        vcpu_sregs.cs.selector = 0;
3494        vcpu.set_sregs(&vcpu_sregs).unwrap();
3495
3496        // Set the Instruction Pointer to the guest address where we loaded
3497        // the code, RCX to the MSR to be written, and EDX:EAX to the data to
3498        // be written.
3499        let mut vcpu_regs = vcpu.get_regs().unwrap();
3500        vcpu_regs.rip = guest_addr;
3501        vcpu_regs.rcx = 0x474f4f00;
3502        vcpu_regs.rax = 0xdeadbeef;
3503        vcpu_regs.rdx = 0xd0c0ffee;
3504        vcpu.set_regs(&vcpu_regs).unwrap();
3505
3506        match vcpu.run().unwrap() {
3507            VcpuExit::X86Wrmsr(exit) => {
3508                assert_eq!(exit.reason, MsrExitReason::Unknown);
3509                assert_eq!(exit.index, 0x474f4f00);
3510                assert_eq!(exit.data & 0xffffffff, 0xdeadbeef);
3511                assert_eq!((exit.data >> 32) & 0xffffffff, 0xd0c0ffee);
3512            }
3513            e => panic!("Unexpected exit: {:?}", e),
3514        }
3515    }
3516
3517    #[test]
3518    #[cfg(target_arch = "x86_64")]
3519    fn test_coalesced_pio() {
3520        use crate::IoEventAddress;
3521        use std::io::Write;
3522
3523        const PORT: u64 = 0x2c;
3524        const DATA: u64 = 0x39;
3525        const SIZE: u32 = 1;
3526
3527        #[rustfmt::skip]
3528        let code = [
3529            0xe6, 0x2c,   // out 0x2c, al
3530            0xf4,         // hlt
3531            0xe6, 0x2c,   // out 0x2c, al
3532            0xf4,         // hlt
3533        ];
3534
3535        let kvm = Kvm::new().unwrap();
3536        let vm = kvm.create_vm().unwrap();
3537        assert!(vm.check_extension(Cap::CoalescedPio));
3538
3539        // Prepare guest memory
3540        let mem_size = 0x4000;
3541        let load_addr = mmap_anonymous(mem_size).as_ptr();
3542        let guest_addr: u64 = 0x1000;
3543        let slot = 0;
3544        let mem_region = kvm_userspace_memory_region {
3545            slot,
3546            guest_phys_addr: guest_addr,
3547            memory_size: mem_size as u64,
3548            userspace_addr: load_addr as u64,
3549            flags: 0,
3550        };
3551
3552        unsafe {
3553            vm.set_user_memory_region(mem_region).unwrap();
3554
3555            // Get a mutable slice of `mem_size` from `load_addr`.
3556            // This is safe because we mapped it before.
3557            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3558            slice.write_all(&code).unwrap();
3559        }
3560
3561        let addr = IoEventAddress::Pio(PORT);
3562        vm.register_coalesced_mmio(addr, SIZE).unwrap();
3563
3564        let mut vcpu = vm.create_vcpu(0).unwrap();
3565
3566        // Map the MMIO ring
3567        vcpu.map_coalesced_mmio_ring().unwrap();
3568
3569        // Set regs
3570        let mut regs = vcpu.get_regs().unwrap();
3571        regs.rip = guest_addr;
3572        regs.rax = DATA;
3573        regs.rflags = 2;
3574        vcpu.set_regs(&regs).unwrap();
3575
3576        // Set sregs
3577        let mut sregs = vcpu.get_sregs().unwrap();
3578        sregs.cs.base = 0;
3579        sregs.cs.selector = 0;
3580        vcpu.set_sregs(&sregs).unwrap();
3581
3582        // Run and check that the exit was caused by the hlt and not the port
3583        // I/O
3584        let exit = vcpu.run().unwrap();
3585        assert!(matches!(exit, VcpuExit::Hlt));
3586
3587        // Check that the ring buffer entry is what we expect
3588        let entry = vcpu.coalesced_mmio_read().unwrap().unwrap();
3589        assert_eq!(entry.phys_addr, PORT);
3590        assert_eq!(entry.len, 1);
3591        assert_eq!(entry.data[0] as u64, DATA);
3592        // SAFETY: this field is a u32 in all variants of the union,
3593        // so access is always safe.
3594        let pio = unsafe { entry.__bindgen_anon_1.pio };
3595        assert_eq!(pio, 1);
3596
3597        // The ring buffer should be empty now
3598        assert!(vcpu.coalesced_mmio_read().unwrap().is_none());
3599
3600        // Unregister and check that the next PIO write triggers an exit
3601        vm.unregister_coalesced_mmio(addr, SIZE).unwrap();
3602        let exit = vcpu.run().unwrap();
3603        let VcpuExit::IoOut(port, data) = exit else {
3604            panic!("Unexpected VM exit: {:?}", exit);
3605        };
3606        assert_eq!(port, PORT as u16);
3607        assert_eq!(data, (DATA as u8).to_le_bytes());
3608    }
3609
3610    #[test]
3611    #[cfg(target_arch = "x86_64")]
3612    fn test_coalesced_mmio() {
3613        use crate::IoEventAddress;
3614        use std::io::Write;
3615
3616        const ADDR: u64 = 0x124;
3617        const DATA: u64 = 0x39;
3618        const SIZE: u32 = 2;
3619
3620        #[rustfmt::skip]
3621        let code = [
3622            0x66, 0x31, 0xFF,        // xor di,di
3623            0x66, 0xBF, 0x24, 0x01,  // mov di, 0x124
3624            0x67, 0x66, 0x89, 0x05,  // mov WORD PTR [di], ax
3625            0xF4,                    // hlt
3626            0x66, 0x31, 0xFF,        // xor di,di
3627            0x66, 0xBF, 0x24, 0x01,  // mov di, 0x124
3628            0x67, 0x66, 0x89, 0x05,  // mov WORD PTR [di], ax
3629            0xF4,                    // hlt
3630        ];
3631
3632        let kvm = Kvm::new().unwrap();
3633        let vm = kvm.create_vm().unwrap();
3634        assert!(vm.check_extension(Cap::CoalescedMmio));
3635
3636        // Prepare guest memory
3637        let mem_size = 0x4000;
3638        let load_addr = mmap_anonymous(mem_size).as_ptr();
3639        let guest_addr: u64 = 0x1000;
3640        let slot: u32 = 0;
3641        let mem_region = kvm_userspace_memory_region {
3642            slot,
3643            guest_phys_addr: guest_addr,
3644            memory_size: mem_size as u64,
3645            userspace_addr: load_addr as u64,
3646            flags: 0,
3647        };
3648
3649        unsafe {
3650            vm.set_user_memory_region(mem_region).unwrap();
3651
3652            // Get a mutable slice of `mem_size` from `load_addr`.
3653            // This is safe because we mapped it before.
3654            let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size);
3655            slice.write_all(&code).unwrap();
3656        }
3657
3658        let addr = IoEventAddress::Mmio(ADDR);
3659        vm.register_coalesced_mmio(addr, SIZE).unwrap();
3660
3661        let mut vcpu = vm.create_vcpu(0).unwrap();
3662
3663        // Map the MMIO ring
3664        vcpu.map_coalesced_mmio_ring().unwrap();
3665
3666        // Set regs
3667        let mut regs = vcpu.get_regs().unwrap();
3668        regs.rip = guest_addr;
3669        regs.rax = DATA;
3670        regs.rdx = ADDR;
3671        regs.rflags = 2;
3672        vcpu.set_regs(&regs).unwrap();
3673
3674        // Set sregs
3675        let mut sregs = vcpu.get_sregs().unwrap();
3676        sregs.cs.base = 0;
3677        sregs.cs.selector = 0;
3678        vcpu.set_sregs(&sregs).unwrap();
3679
3680        // Run and check that the exit was caused by the hlt and not the MMIO
3681        // access
3682        let exit = vcpu.run().unwrap();
3683        assert!(matches!(exit, VcpuExit::Hlt));
3684
3685        // Check that the ring buffer entry is what we expect
3686        let entry = vcpu.coalesced_mmio_read().unwrap().unwrap();
3687        assert_eq!(entry.phys_addr, ADDR);
3688        assert_eq!(entry.len, SIZE);
3689        assert_eq!(entry.data[0] as u64, DATA);
3690        // SAFETY: this field is a u32 in all variants of the union,
3691        // so access is always safe.
3692        let pio = unsafe { entry.__bindgen_anon_1.pio };
3693        assert_eq!(pio, 0);
3694
3695        // The ring buffer should be empty now
3696        assert!(vcpu.coalesced_mmio_read().unwrap().is_none());
3697
3698        // Unregister and check that the next MMIO write triggers an exit
3699        vm.unregister_coalesced_mmio(addr, SIZE).unwrap();
3700        let exit = vcpu.run().unwrap();
3701        let VcpuExit::MmioWrite(addr, data) = exit else {
3702            panic!("Unexpected VM exit: {:?}", exit);
3703        };
3704        assert_eq!(addr, ADDR);
3705        assert_eq!(data, (DATA as u16).to_le_bytes());
3706    }
3707
3708    #[test]
3709    #[cfg(target_arch = "x86_64")]
3710    fn test_get_and_set_nested_state() {
3711        let kvm = Kvm::new().unwrap();
3712        let vm = kvm.create_vm().unwrap();
3713        let vcpu = vm.create_vcpu(0).unwrap();
3714
3715        // Ensure that KVM also during runtime never wants more memory than we have pre-allocated
3716        // by the helper type. KVM is expected to report:
3717        // - 128+4096==4224 on SVM
3718        // - 128+8192==8320 on VMX
3719        let kvm_nested_state_size = kvm.check_extension_int(Cap::NestedState) as usize;
3720        assert!(kvm_nested_state_size <= size_of::<KvmNestedStateBuffer>());
3721
3722        let mut state_buffer = KvmNestedStateBuffer::default();
3723        // Ensure that header shows full buffer length.
3724        assert_eq!(
3725            state_buffer.size as usize,
3726            size_of::<KvmNestedStateBuffer>()
3727        );
3728
3729        vcpu.nested_state(&mut state_buffer).unwrap();
3730        let old_state = state_buffer;
3731
3732        // There is no nested guest in this test, so there is no payload.
3733        assert_eq!(state_buffer.size as usize, size_of::<kvm_nested_state>());
3734
3735        vcpu.set_nested_state(&old_state).unwrap();
3736    }
3737}