vfio_ioctls/
vfio_device.rs

1// Copyright © 2019 Intel Corporation
2// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
3//
4// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
5
6use std::any::Any;
7use std::collections::HashMap;
8use std::ffi::CString;
9use std::fs::{File, OpenOptions};
10use std::mem::{self, ManuallyDrop};
11use std::os::unix::io::{AsRawFd, RawFd};
12use std::os::unix::prelude::FileExt;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, Mutex};
15
16use byteorder::{ByteOrder, NativeEndian};
17use log::{debug, error, warn};
18use vfio_bindings::bindings::vfio::*;
19use vm_memory::{Address, GuestMemory, GuestMemoryRegion, MemoryRegionAddress};
20use vmm_sys_util::eventfd::EventFd;
21
22use crate::fam::vec_with_array_field;
23use crate::vfio_ioctls::*;
24use crate::{Result, VfioError};
25#[cfg(all(feature = "kvm", not(test)))]
26use kvm_bindings::{
27    kvm_device_attr, KVM_DEV_VFIO_FILE, KVM_DEV_VFIO_FILE_ADD, KVM_DEV_VFIO_FILE_DEL,
28};
29#[cfg(all(feature = "kvm", not(test)))]
30use kvm_ioctls::DeviceFd as KvmDeviceFd;
31#[cfg(all(feature = "mshv", not(test)))]
32use mshv_bindings::{
33    mshv_device_attr, MSHV_DEV_VFIO_FILE, MSHV_DEV_VFIO_FILE_ADD, MSHV_DEV_VFIO_FILE_DEL,
34};
35#[cfg(all(feature = "mshv", not(test)))]
36use mshv_ioctls::DeviceFd as MshvDeviceFd;
37#[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
38use std::os::unix::io::FromRawFd;
39#[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
40use vmm_sys_util::errno::Error;
41
42#[derive(Debug)]
43enum DeviceFdInner {
44    #[cfg(all(feature = "kvm", not(test)))]
45    Kvm(KvmDeviceFd),
46    #[cfg(all(feature = "mshv", not(test)))]
47    Mshv(MshvDeviceFd),
48}
49
50#[derive(Debug)]
51/// A wrapper for a device fd from either KVM or MSHV.
52pub struct VfioDeviceFd(DeviceFdInner);
53
54impl VfioDeviceFd {
55    /// Create an VfioDeviceFd from a KVM DeviceFd
56    #[cfg(all(feature = "kvm", not(test)))]
57    pub fn new_from_kvm(fd: KvmDeviceFd) -> Self {
58        VfioDeviceFd(DeviceFdInner::Kvm(fd))
59    }
60    /// Extract the KVM DeviceFd from an VfioDeviceFd
61    #[cfg(all(feature = "kvm", not(test)))]
62    pub fn to_kvm(self) -> Result<KvmDeviceFd> {
63        match self {
64            VfioDeviceFd(DeviceFdInner::Kvm(fd)) => Ok(fd),
65            #[allow(unreachable_patterns)]
66            _ => Err(VfioError::VfioDeviceFdWrongType),
67        }
68    }
69    /// Create an VfioDeviceFd from an MSHV DeviceFd
70    #[cfg(all(feature = "mshv", not(test)))]
71    pub fn new_from_mshv(fd: MshvDeviceFd) -> Self {
72        VfioDeviceFd(DeviceFdInner::Mshv(fd))
73    }
74    /// Extract the MSHV DeviceFd from an VfioDeviceFd
75    #[cfg(all(feature = "mshv", not(test)))]
76    pub fn to_mshv(self) -> Result<MshvDeviceFd> {
77        match self {
78            VfioDeviceFd(DeviceFdInner::Mshv(fd)) => Ok(fd),
79            #[allow(unreachable_patterns)]
80            _ => Err(VfioError::VfioDeviceFdWrongType),
81        }
82    }
83    /// Try to duplicate an VfioDeviceFd
84    #[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
85    pub fn try_clone(&self) -> Result<Self> {
86        match &self.0 {
87            #[cfg(feature = "kvm")]
88            DeviceFdInner::Kvm(fd) => {
89                // SAFETY: FFI call to libc
90                let dup_fd = unsafe { libc::dup(fd.as_raw_fd()) };
91                if dup_fd == -1 {
92                    Err(VfioError::VfioDeviceDupFd)
93                } else {
94                    // SAFETY: dup_fd is a valid device fd for KVM
95                    let kvm_fd = unsafe { KvmDeviceFd::from_raw_fd(dup_fd) };
96                    Ok(VfioDeviceFd(DeviceFdInner::Kvm(kvm_fd)))
97                }
98            }
99            #[cfg(feature = "mshv")]
100            DeviceFdInner::Mshv(fd) => {
101                // SAFETY: FFI call to libc
102                let dup_fd = unsafe { libc::dup(fd.as_raw_fd()) };
103                if dup_fd == -1 {
104                    Err(VfioError::VfioDeviceDupFd)
105                } else {
106                    // SAFETY: dup_fd is a valid device fd for MSHV
107                    let mshv_fd = unsafe { MshvDeviceFd::from_raw_fd(dup_fd) };
108                    Ok(VfioDeviceFd(DeviceFdInner::Mshv(mshv_fd)))
109                }
110            }
111        }
112    }
113}
114
115pub type VfioContainerDeviceHandle = Arc<VfioDeviceFd>;
116
117#[repr(C)]
118#[derive(Debug, Default)]
119// A VFIO region structure with an incomplete array for region
120// capabilities information.
121//
122// When the VFIO_DEVICE_GET_REGION_INFO ioctl returns with
123// VFIO_REGION_INFO_FLAG_CAPS flag set, it also provides the size of the region
124// capabilities information. This is a kernel hint for us to fetch this
125// information by calling the same ioctl, but with the argument size set to
126// the region plus the capabilities information array length. The kernel will
127// then fill our vfio_region_info_with_cap structure with both the region info
128// and its capabilities.
129pub struct vfio_region_info_with_cap {
130    pub region_info: vfio_region_info,
131    cap_info: __IncompleteArrayField<u8>,
132}
133
134impl vfio_region_info_with_cap {
135    fn from_region_info(region_info: &vfio_region_info) -> Vec<Self> {
136        let region_info_size: u32 = mem::size_of::<vfio_region_info>() as u32;
137        let cap_len: usize = (region_info.argsz - region_info_size) as usize;
138
139        let mut region_with_cap = vec_with_array_field::<Self, u8>(cap_len);
140        region_with_cap[0].region_info.argsz = region_info.argsz;
141        region_with_cap[0].region_info.flags = 0;
142        region_with_cap[0].region_info.index = region_info.index;
143        region_with_cap[0].region_info.cap_offset = 0;
144        region_with_cap[0].region_info.size = 0;
145        region_with_cap[0].region_info.offset = 0;
146
147        region_with_cap
148    }
149}
150/// Trait to define common operations exposed to user-space drivers for
151/// VFIO device wrappers that are either backed by a legacy VfioContainer or
152/// a VFIO cdev device using iommufd.
153pub trait VfioOps: Any + Send + Sync {
154    /// Map a region of user space memory (e.g. guest memory) into an IO
155    /// address space managed by IOMMU hardware to enable DMA for
156    /// associated VFIO devices
157    ///
158    /// # Parameters
159    /// * iova: IO virtual address to map the memory.
160    /// * size: size of the memory region.
161    /// * user_addr: user space address (e.g. host virtual address) for
162    ///   the guest memory region to map.
163    fn vfio_dma_map(&self, _iova: u64, _size: u64, _user_addr: u64) -> Result<()> {
164        unimplemented!()
165    }
166
167    /// Unmap a region of user space memory (e.g. guest memory) from an IO
168    /// address space managed by IOMMU hardware to disable DMA for
169    /// associated VFIO devices
170    ///
171    /// # Parameters
172    /// * iova: IO virtual address to unmap the memory.
173    /// * size: size of the memory region.
174    fn vfio_dma_unmap(&self, _iova: u64, _size: u64) -> Result<()> {
175        unimplemented!()
176    }
177
178    /// Downcast to the underlying vfio wrapper type
179    fn as_any(&self) -> &dyn Any {
180        unimplemented!()
181    }
182}
183
184struct VfioCommon {
185    #[allow(dead_code)]
186    device_fd: Option<VfioContainerDeviceHandle>,
187}
188
189impl VfioCommon {
190    #[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
191    fn device_set_fd(&self, dev_fd: RawFd, add: bool) -> Result<()> {
192        let dev_fd_ptr = &dev_fd as *const i32;
193
194        if let Some(device_fd) = self.device_fd.as_ref() {
195            match &device_fd.0 {
196                #[cfg(feature = "kvm")]
197                DeviceFdInner::Kvm(fd) => {
198                    let flag = if add {
199                        KVM_DEV_VFIO_FILE_ADD
200                    } else {
201                        KVM_DEV_VFIO_FILE_DEL
202                    };
203                    let dev_attr = kvm_device_attr {
204                        flags: 0,
205                        group: KVM_DEV_VFIO_FILE,
206                        attr: u64::from(flag),
207                        addr: dev_fd_ptr as u64,
208                    };
209                    fd.set_device_attr(&dev_attr)
210                        .map_err(|e| VfioError::SetDeviceAttr(Error::new(e.errno())))
211                }
212                #[cfg(feature = "mshv")]
213                DeviceFdInner::Mshv(fd) => {
214                    let flag = if add {
215                        MSHV_DEV_VFIO_FILE_ADD
216                    } else {
217                        MSHV_DEV_VFIO_FILE_DEL
218                    };
219                    let dev_attr = mshv_device_attr {
220                        flags: 0,
221                        group: MSHV_DEV_VFIO_FILE,
222                        attr: u64::from(flag),
223                        addr: dev_fd_ptr as u64,
224                    };
225                    fd.set_device_attr(&dev_attr)
226                        .map_err(|e| VfioError::SetDeviceAttr(Error::new(e.errno())))
227                }
228            }
229        } else {
230            Ok(())
231        }
232    }
233}
234
235/// A safe wrapper over a VFIO container object.
236///
237/// A VFIO container represents an IOMMU domain, or a set of IO virtual address translation tables.
238/// On its own, the container provides little functionality, with all but a couple version and
239/// extension query interfaces locked away. The user needs to add a group into the container for
240/// the next level of functionality. After some groups are associated with a container, the user
241/// can query and set the IOMMU backend, and then build IOVA mapping to access memory.
242///
243/// Multiple VFIO groups may be associated with the same VFIO container to share the underline
244/// address translation mapping tables.
245pub struct VfioContainer {
246    pub(crate) container: File,
247    pub(crate) groups: Mutex<HashMap<u32, Arc<VfioGroup>>>,
248    #[allow(dead_code)]
249    common: VfioCommon,
250}
251
252impl VfioContainer {
253    /// Create a container wrapper object.
254    ///
255    /// # Arguments
256    /// * `device_fd`: An optional file handle of the hypervisor VFIO device.
257    pub fn new(device_fd: Option<VfioContainerDeviceHandle>) -> Result<Self> {
258        let container = OpenOptions::new()
259            .read(true)
260            .write(true)
261            .open("/dev/vfio/vfio")
262            .map_err(VfioError::OpenContainer)?;
263
264        let container = VfioContainer {
265            container,
266            common: VfioCommon { device_fd },
267            groups: Mutex::new(HashMap::new()),
268        };
269        container.check_api_version()?;
270        container.check_extension(VFIO_TYPE1v2_IOMMU)?;
271
272        Ok(container)
273    }
274
275    fn check_api_version(&self) -> Result<()> {
276        let version = vfio_syscall::check_api_version(self);
277        if version as u32 != VFIO_API_VERSION {
278            return Err(VfioError::VfioApiVersion);
279        }
280        Ok(())
281    }
282
283    fn check_extension(&self, val: u32) -> Result<()> {
284        if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
285            return Err(VfioError::VfioInvalidType);
286        }
287
288        let ret = vfio_syscall::check_extension(self, val)?;
289        if ret != 1 {
290            return Err(VfioError::VfioExtension);
291        }
292
293        Ok(())
294    }
295
296    fn set_iommu(&self, val: u32) -> Result<()> {
297        if val != VFIO_TYPE1_IOMMU && val != VFIO_TYPE1v2_IOMMU {
298            return Err(VfioError::VfioInvalidType);
299        }
300
301        vfio_syscall::set_iommu(self, val)
302    }
303
304    fn get_group(&self, group_id: u32) -> Result<Arc<VfioGroup>> {
305        // Safe because there's no legal way to break the lock.
306        let mut hash = self.groups.lock().unwrap();
307        if let Some(entry) = hash.get(&group_id) {
308            return Ok(entry.clone());
309        }
310
311        let group = Arc::new(VfioGroup::new(group_id)?);
312
313        // Bind the new group object to the container.
314        vfio_syscall::set_group_container(&group, self)?;
315
316        // Initialize the IOMMU backend driver after binding the first group object.
317        if hash.is_empty() {
318            if let Err(e) = self.set_iommu(VFIO_TYPE1v2_IOMMU) {
319                let _ = vfio_syscall::unset_group_container(&group, self);
320                return Err(e);
321            }
322        }
323
324        // Add the new group object to the hypervisor driver.
325        #[cfg(any(feature = "kvm", feature = "mshv"))]
326        if let Err(e) = self.device_add_group(&group) {
327            let _ = vfio_syscall::unset_group_container(&group, self);
328            return Err(e);
329        }
330
331        hash.insert(group_id, group.clone());
332
333        Ok(group)
334    }
335
336    fn put_group(&self, group: Arc<VfioGroup>) {
337        // Safe because there's no legal way to break the lock.
338        let mut hash = self.groups.lock().unwrap();
339
340        // Clean up the group when the last user releases reference to the group, three reference
341        // count for:
342        // - one reference cloned in VfioDevice.drop() and passed into here
343        // - one reference held by the groups hashmap
344        if Arc::strong_count(&group) == 2 {
345            #[cfg(any(feature = "kvm", feature = "mshv"))]
346            match self.device_del_group(&group) {
347                Ok(_) => {}
348                Err(e) => {
349                    error!("Could not delete VFIO group: {e:?}");
350                    return;
351                }
352            }
353            if vfio_syscall::unset_group_container(&group, self).is_err() {
354                error!("Could not unbind VFIO group: {:?}", group.id());
355                return;
356            }
357            hash.remove(&group.id());
358        }
359    }
360
361    /// Map a region of user space memory (e.g. guest memory) into an IO
362    /// address space managed by IOMMU hardware to enable DMA for
363    /// associated VFIO devices
364    ///
365    /// # Parameters
366    /// * iova: IO virtual address to mapping the memory.
367    /// * size: size of the memory region.
368    /// * user_addr: host virtual address for the guest memory region to map.
369    pub fn vfio_dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<()> {
370        let dma_map = vfio_iommu_type1_dma_map {
371            argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
372            flags: VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
373            vaddr: user_addr,
374            iova,
375            size,
376        };
377
378        vfio_syscall::map_dma(self, &dma_map)
379    }
380
381    /// Unmap a region of user space memory (e.g. guest memory) from an IO
382    /// address space managed by IOMMU hardware to disable DMA for
383    /// associated VFIO devices
384    ///
385    /// # Parameters
386    /// * iova: IO virtual address to unmap the memory.
387    /// * size: size of the memory region.
388    pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
389        let mut dma_unmap = vfio_iommu_type1_dma_unmap {
390            argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
391            flags: 0,
392            iova,
393            size,
394            ..Default::default()
395        };
396
397        vfio_syscall::unmap_dma(self, &mut dma_unmap)?;
398        if dma_unmap.size != size {
399            return Err(VfioError::InvalidDmaUnmapSize);
400        }
401
402        Ok(())
403    }
404
405    /// Add all guest memory regions into the vfio container's iommu table.
406    ///
407    /// # Parameters
408    /// * mem: pinned guest memory which could be accessed by devices binding to the container.
409    pub fn vfio_map_guest_memory<M: GuestMemory>(&self, mem: &M) -> Result<()> {
410        mem.iter().try_for_each(|region| {
411            let host_addr = region
412                .get_host_address(MemoryRegionAddress(0))
413                .map_err(|_| VfioError::GetHostAddress)?;
414            self.vfio_dma_map(
415                region.start_addr().raw_value(),
416                region.len(),
417                host_addr as u64,
418            )
419        })
420    }
421
422    /// Remove all guest memory regions from the vfio container's iommu table.
423    ///
424    /// The vfio kernel driver and device hardware couldn't access this guest memory after
425    /// returning from the function.
426    ///
427    /// # Parameters
428    /// * mem: pinned guest memory which could be accessed by devices binding to the container.
429    pub fn vfio_unmap_guest_memory<M: GuestMemory>(&self, mem: &M) -> Result<()> {
430        mem.iter().try_for_each(|region| {
431            self.vfio_dma_unmap(region.start_addr().raw_value(), region.len())
432        })
433    }
434
435    /// Add a device to a VFIO group
436    ///
437    /// The VFIO device fd should have been set.
438    ///
439    /// # Parameters
440    /// * group: target VFIO group
441    #[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
442    fn device_add_group(&self, group: &VfioGroup) -> Result<()> {
443        self.common.device_set_fd(group.as_raw_fd(), true)
444    }
445
446    /// Delete a device from a VFIO group
447    ///
448    /// The VFIO device fd should have been set.
449    ///
450    /// # Parameters
451    /// * group: target VFIO group
452    #[cfg(all(any(feature = "kvm", feature = "mshv"), not(test)))]
453    fn device_del_group(&self, group: &VfioGroup) -> Result<()> {
454        self.common.device_set_fd(group.as_raw_fd(), false)
455    }
456
457    #[cfg(test)]
458    fn device_add_group(&self, _group: &VfioGroup) -> Result<()> {
459        Ok(())
460    }
461
462    #[cfg(test)]
463    fn device_del_group(&self, _group: &VfioGroup) -> Result<()> {
464        Ok(())
465    }
466}
467
468impl AsRawFd for VfioContainer {
469    fn as_raw_fd(&self) -> RawFd {
470        self.container.as_raw_fd()
471    }
472}
473
474impl VfioOps for VfioContainer {
475    fn vfio_dma_map(&self, iova: u64, size: u64, user_addr: u64) -> Result<()> {
476        self.vfio_dma_map(iova, size, user_addr)
477    }
478
479    fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
480        self.vfio_dma_unmap(iova, size)
481    }
482
483    fn as_any(&self) -> &dyn Any {
484        self
485    }
486}
487
488/// A safe wrapper over a VFIO group object.
489///
490/// The Linux VFIO frameworks supports multiple devices per group, and multiple groups per
491/// container. But current implementation assumes there's only one device per group to simplify
492/// implementation. With such an assumption, the `VfioGroup` becomes an internal implementation
493/// details.
494pub struct VfioGroup {
495    pub(crate) id: u32,
496    pub(crate) group: File,
497}
498
499impl VfioGroup {
500    #[cfg(not(test))]
501    fn open_group_file(id: u32) -> Result<File> {
502        let group_path = Path::new("/dev/vfio").join(id.to_string());
503        OpenOptions::new()
504            .read(true)
505            .write(true)
506            .open(group_path)
507            .map_err(|e| VfioError::OpenGroup(e, id.to_string()))
508    }
509
510    /// Create a new VfioGroup object.
511    ///
512    /// # Parameters
513    /// * `id`: ID(index) of the VFIO group file.
514    fn new(id: u32) -> Result<Self> {
515        let group = Self::open_group_file(id)?;
516        let mut group_status = vfio_group_status {
517            argsz: mem::size_of::<vfio_group_status>() as u32,
518            flags: 0,
519        };
520        vfio_syscall::get_group_status(&group, &mut group_status)?;
521        if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
522            return Err(VfioError::GroupViable);
523        }
524
525        Ok(VfioGroup { id, group })
526    }
527
528    fn id(&self) -> u32 {
529        self.id
530    }
531
532    fn get_device(&self, name: &Path) -> Result<VfioDeviceInfo> {
533        let uuid_osstr = name.file_name().ok_or(VfioError::InvalidPath)?;
534        let uuid_str = uuid_osstr.to_str().ok_or(VfioError::InvalidPath)?;
535        let path: CString = CString::new(uuid_str.as_bytes()).expect("CString::new() failed");
536        let device = vfio_syscall::get_group_device_fd(self, &path)?;
537        let dev_info = VfioDeviceInfo::get_device_info(&device)?;
538
539        Ok(VfioDeviceInfo::new(device, &dev_info))
540    }
541}
542
543impl AsRawFd for VfioGroup {
544    fn as_raw_fd(&self) -> RawFd {
545        self.group.as_raw_fd()
546    }
547}
548
549/// Represent one area of the sparse mmap
550#[derive(Copy, Clone, Debug, PartialEq, Eq)]
551pub struct VfioRegionSparseMmapArea {
552    /// Offset of mmap'able area within region
553    pub offset: u64,
554    /// Size of mmap'able area
555    pub size: u64,
556}
557
558/// List of sparse mmap areas
559#[derive(Clone, Debug, PartialEq, Eq)]
560pub struct VfioRegionInfoCapSparseMmap {
561    /// List of areas
562    pub areas: Vec<VfioRegionSparseMmapArea>,
563}
564
565/// Represent a specific device by providing type and subtype
566#[derive(Copy, Clone, Debug, PartialEq, Eq)]
567pub struct VfioRegionInfoCapType {
568    /// Device type
569    pub type_: u32,
570    /// Device subtype
571    pub subtype: u32,
572}
573
574/// Carry NVLink SSA TGT information
575#[derive(Copy, Clone, Debug, PartialEq, Eq)]
576pub struct VfioRegionInfoCapNvlink2Ssatgt {
577    /// TGT value
578    pub tgt: u64,
579}
580
581/// Carry NVLink link speed information
582#[derive(Copy, Clone, Debug, PartialEq, Eq)]
583pub struct VfioRegionInfoCapNvlink2Lnkspd {
584    /// Link speed value
585    pub link_speed: u32,
586}
587
588/// List of capabilities that can be related to a region.
589#[derive(Clone, Debug, PartialEq, Eq)]
590pub enum VfioRegionInfoCap {
591    /// Sparse memory mapping type
592    SparseMmap(VfioRegionInfoCapSparseMmap),
593    /// Capability holding type and subtype
594    Type(VfioRegionInfoCapType),
595    /// Indicate if the region is mmap'able with the presence of MSI-X region
596    MsixMappable,
597    /// NVLink SSA TGT
598    Nvlink2Ssatgt(VfioRegionInfoCapNvlink2Ssatgt),
599    /// NVLink Link Speed
600    Nvlink2Lnkspd(VfioRegionInfoCapNvlink2Lnkspd),
601}
602
603/// Information about VFIO MMIO region.
604#[derive(Clone, Debug)]
605pub struct VfioRegion {
606    pub(crate) flags: u32,
607    pub(crate) size: u64,
608    pub(crate) offset: u64,
609    pub(crate) caps: Vec<VfioRegionInfoCap>,
610}
611
612/// Information about VFIO interrupts.
613#[derive(Copy, Clone, Debug, PartialEq, Eq)]
614pub struct VfioIrq {
615    /// Flags for irq.
616    pub flags: u32,
617    /// Staring index.
618    pub index: u32,
619    /// Number interrupts.
620    pub count: u32,
621}
622
623pub(crate) struct VfioDeviceInfo {
624    device: File,
625    flags: u32,
626    num_regions: u32,
627    num_irqs: u32,
628}
629
630impl VfioDeviceInfo {
631    #[inline]
632    /// Get device type from device_info flags.
633    ///
634    /// # Parameters
635    /// * `flags`: flags field in device_info structure.
636    fn get_device_type(flags: &u32) -> u32 {
637        // There may be more types of device here later according to vfio_bindings.
638        let device_type: u32 = VFIO_DEVICE_FLAGS_PCI
639            | VFIO_DEVICE_FLAGS_PLATFORM
640            | VFIO_DEVICE_FLAGS_AMBA
641            | VFIO_DEVICE_FLAGS_CCW
642            | VFIO_DEVICE_FLAGS_AP;
643
644        flags & device_type
645    }
646
647    fn get_device_info(device: &File) -> Result<vfio_device_info> {
648        let mut dev_info = vfio_device_info {
649            argsz: mem::size_of::<vfio_device_info>() as u32,
650            flags: 0,
651            num_regions: 0,
652            num_irqs: 0,
653            cap_offset: 0,
654            pad: 0,
655        };
656        vfio_syscall::get_device_info(device, &mut dev_info)?;
657        match VfioDeviceInfo::get_device_type(&dev_info.flags) {
658            VFIO_DEVICE_FLAGS_PLATFORM => {}
659            VFIO_DEVICE_FLAGS_PCI => {
660                if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
661                    || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
662                {
663                    return Err(VfioError::VfioDeviceGetInfoPCI);
664                }
665            }
666            _ => {
667                return Err(VfioError::VfioDeviceGetInfoOther);
668            }
669        }
670
671        Ok(dev_info)
672    }
673
674    fn new(device: File, dev_info: &vfio_device_info) -> Self {
675        VfioDeviceInfo {
676            device,
677            flags: dev_info.flags,
678            num_regions: dev_info.num_regions,
679            num_irqs: dev_info.num_irqs,
680        }
681    }
682
683    fn get_irqs(&self) -> Result<HashMap<u32, VfioIrq>> {
684        let mut irqs: HashMap<u32, VfioIrq> = HashMap::new();
685
686        for index in 0..self.num_irqs {
687            let mut irq_info = vfio_irq_info {
688                argsz: mem::size_of::<vfio_irq_info>() as u32,
689                flags: 0,
690                index,
691                count: 0,
692            };
693
694            if vfio_syscall::get_device_irq_info(self, &mut irq_info).is_err() {
695                warn!("Could not get VFIO IRQ info for index {index:}");
696                continue;
697            }
698
699            let irq = VfioIrq {
700                flags: irq_info.flags,
701                index,
702                count: irq_info.count,
703            };
704
705            debug!("IRQ #{index}");
706            debug!("\tflag 0x{:x}", irq.flags);
707            debug!("\tindex {}", irq.index);
708            debug!("\tcount {}", irq.count);
709            irqs.insert(index, irq);
710        }
711
712        Ok(irqs)
713    }
714
715    fn get_region_map(
716        &self,
717        region: &mut VfioRegion,
718        region_info: &vfio_region_info,
719    ) -> Result<()> {
720        let region_info_size: u32 = mem::size_of::<vfio_region_info>() as u32;
721
722        if region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0
723            || region_info.argsz <= region_info_size
724        {
725            // There is not capabilities information for that region, we can just return.
726            return Ok(());
727        }
728
729        // There is a capability information for that region, we have to call
730        // VFIO_DEVICE_GET_REGION_INFO with a vfio_region_with_cap structure and the hinted size.
731        let mut region_with_cap = vfio_region_info_with_cap::from_region_info(region_info);
732        vfio_syscall::get_device_region_info_cap(self, &mut region_with_cap)?;
733
734        // region_with_cap[0] may contain different types of structure depending on the capability
735        // type, but all of them begin with vfio_info_cap_header in order to identify the capability
736        // type, version and if there's another capability after this one.
737        // It is safe to convert region_with_cap[0] with an offset of cap_offset into
738        // vfio_info_cap_header pointer and access its elements, as long as cap_offset is greater
739        // than region_info_size.
740        //
741        // Safety: following code is safe because we trust data returned by the kernel.
742        if region_with_cap[0].region_info.cap_offset >= region_info_size {
743            let mut next_cap_offset = region_with_cap[0].region_info.cap_offset;
744            let info_ptr = &region_with_cap[0] as *const vfio_region_info_with_cap as *const u8;
745
746            while next_cap_offset >= region_info_size {
747                // SAFETY: data structure returned by kernel is trusted.
748                let cap_header = unsafe {
749                    *(info_ptr.offset(next_cap_offset as isize) as *const vfio_info_cap_header)
750                };
751
752                match u32::from(cap_header.id) {
753                    VFIO_REGION_INFO_CAP_SPARSE_MMAP => {
754                        // SAFETY: data structure returned by kernel is trusted.
755                        let sparse_mmap = unsafe {
756                            info_ptr.offset(next_cap_offset as isize)
757                                as *const vfio_region_info_cap_sparse_mmap
758                        };
759                        // SAFETY: data structure returned by kernel is trusted.
760                        let nr_areas = unsafe { (*sparse_mmap).nr_areas };
761                        // SAFETY: data structure returned by kernel is trusted.
762                        let areas = unsafe { (*sparse_mmap).areas.as_slice(nr_areas as usize) };
763
764                        let cap = VfioRegionInfoCapSparseMmap {
765                            areas: areas
766                                .iter()
767                                .map(|a| VfioRegionSparseMmapArea {
768                                    offset: a.offset,
769                                    size: a.size,
770                                })
771                                .collect(),
772                        };
773                        region.caps.push(VfioRegionInfoCap::SparseMmap(cap));
774                    }
775                    VFIO_REGION_INFO_CAP_TYPE => {
776                        // SAFETY: data structure returned by kernel is trusted.
777                        let type_ = unsafe {
778                            *(info_ptr.offset(next_cap_offset as isize)
779                                as *const vfio_region_info_cap_type)
780                        };
781                        let cap = VfioRegionInfoCapType {
782                            type_: type_.type_,
783                            subtype: type_.subtype,
784                        };
785                        region.caps.push(VfioRegionInfoCap::Type(cap));
786                    }
787                    VFIO_REGION_INFO_CAP_MSIX_MAPPABLE => {
788                        region.caps.push(VfioRegionInfoCap::MsixMappable);
789                    }
790                    VFIO_REGION_INFO_CAP_NVLINK2_SSATGT => {
791                        // SAFETY: data structure returned by kernel is trusted.
792                        let nvlink2_ssatgt = unsafe {
793                            *(info_ptr.offset(next_cap_offset as isize)
794                                as *const vfio_region_info_cap_nvlink2_ssatgt)
795                        };
796                        let cap = VfioRegionInfoCapNvlink2Ssatgt {
797                            tgt: nvlink2_ssatgt.tgt,
798                        };
799                        region.caps.push(VfioRegionInfoCap::Nvlink2Ssatgt(cap));
800                    }
801                    VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD => {
802                        // SAFETY: data structure returned by kernel is trusted.
803                        let nvlink2_lnkspd = unsafe {
804                            *(info_ptr.offset(next_cap_offset as isize)
805                                as *const vfio_region_info_cap_nvlink2_lnkspd)
806                        };
807                        let cap = VfioRegionInfoCapNvlink2Lnkspd {
808                            link_speed: nvlink2_lnkspd.link_speed,
809                        };
810                        region.caps.push(VfioRegionInfoCap::Nvlink2Lnkspd(cap));
811                    }
812                    _ => {}
813                }
814
815                next_cap_offset = cap_header.next;
816            }
817        }
818
819        Ok(())
820    }
821
822    fn get_regions(&self) -> Result<Vec<VfioRegion>> {
823        let mut regions: Vec<VfioRegion> = Vec::new();
824
825        for i in VFIO_PCI_BAR0_REGION_INDEX..self.num_regions {
826            let argsz: u32 = mem::size_of::<vfio_region_info>() as u32;
827            let mut reg_info = vfio_region_info {
828                argsz,
829                flags: 0,
830                index: i,
831                cap_offset: 0,
832                size: 0,
833                offset: 0,
834            };
835
836            if let Err(e) = vfio_syscall::get_device_region_info(self, &mut reg_info) {
837                match e {
838                    // Non-VGA devices do not have the VGA region,
839                    // the kernel indicates this by returning -EINVAL,
840                    // and it's not an error.
841                    VfioError::VfioDeviceGetRegionInfo(e)
842                        if e.errno() == libc::EINVAL && i == VFIO_PCI_VGA_REGION_INDEX =>
843                    {
844                        continue;
845                    }
846                    _ => {
847                        error!("Could not get region #{i} info {e}");
848                        continue;
849                    }
850                }
851            }
852
853            let mut region = VfioRegion {
854                flags: reg_info.flags,
855                size: reg_info.size,
856                offset: reg_info.offset,
857                caps: Vec::new(),
858            };
859            if let Err(e) = self.get_region_map(&mut region, &reg_info) {
860                error!("Could not get region #{i} map {e}");
861                continue;
862            }
863
864            debug!("Region #{i}");
865            debug!("\tflag 0x{:x}", region.flags);
866            debug!("\tsize 0x{:x}", region.size);
867            debug!("\toffset 0x{:x}", region.offset);
868            regions.push(region);
869        }
870
871        Ok(regions)
872    }
873}
874
875impl AsRawFd for VfioDeviceInfo {
876    fn as_raw_fd(&self) -> RawFd {
877        self.device.as_raw_fd()
878    }
879}
880
881/// A safe wrapper over a Vfio device to access underlying hardware device.
882///
883/// The VFIO device API includes ioctls for describing the device, the I/O regions and their
884/// read/write/mmap offsets on the device descriptor, as well as mechanisms for describing and
885/// registering interrupt notifications.
886pub struct VfioDevice {
887    pub(crate) device: ManuallyDrop<File>,
888    pub(crate) flags: u32,
889    pub(crate) regions: Vec<VfioRegion>,
890    pub(crate) irqs: HashMap<u32, VfioIrq>,
891    pub(crate) sysfspath: PathBuf,
892    pub(crate) vfio_ops: Arc<dyn VfioOps>,
893}
894
895impl VfioDevice {
896    #[cfg(not(test))]
897    fn get_group_id_from_path(sysfspath: &Path) -> Result<u32> {
898        let uuid_path: PathBuf = [sysfspath, Path::new("iommu_group")].iter().collect();
899        let group_path = uuid_path.read_link().map_err(|_| VfioError::InvalidPath)?;
900        let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
901        let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
902
903        group_str.parse::<u32>().map_err(|_| VfioError::InvalidPath)
904    }
905
906    /// Create a new vfio device, then guest read/write on this device could be transferred into kernel vfio.
907    ///
908    /// # Parameters
909    /// * `sysfspath`: specify the vfio device path in sys file system.
910    /// * `vfio_ops`: the vfio device wrapper object that the new VFIO device object will bind to.
911    pub fn new(sysfspath: &Path, vfio_ops: Arc<dyn VfioOps>) -> Result<Self> {
912        let device_info =
913            if let Some(vfio_container) = vfio_ops.as_any().downcast_ref::<VfioContainer>() {
914                let group_id = Self::get_group_id_from_path(sysfspath)?;
915                let group = vfio_container.get_group(group_id)?;
916                group.get_device(sysfspath)?
917            } else {
918                return Err(VfioError::DowncastVfioOps);
919            };
920
921        let regions = device_info.get_regions()?;
922        let irqs = device_info.get_irqs()?;
923
924        Ok(VfioDevice {
925            device: ManuallyDrop::new(device_info.device),
926            flags: device_info.flags,
927            regions,
928            irqs,
929            sysfspath: sysfspath.to_path_buf(),
930            vfio_ops,
931        })
932    }
933
934    /// VFIO device reset only if the device supports being reset.
935    pub fn reset(&self) {
936        if self.flags & VFIO_DEVICE_FLAGS_RESET != 0 {
937            vfio_syscall::reset(self);
938        }
939    }
940
941    /// Get information about VFIO IRQs.
942    ///
943    /// # Arguments
944    /// * `irq_index` - The type (INTX, MSI or MSI-X) of interrupts to enable.
945    pub fn get_irq_info(&self, irq_index: u32) -> Option<&VfioIrq> {
946        self.irqs.get(&irq_index)
947    }
948
949    /// Trigger a VFIO device IRQ from userspace.
950    ///
951    /// Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used with ACTION_TRIGGER
952    /// to perform kernel level interrupt loopback testing from userspace (ie. simulate hardware
953    /// triggering).
954    ///
955    /// # Arguments
956    /// * `irq_index` - The type (INTX, MSI or MSI-X) of interrupts to enable.
957    /// * `vector` - The sub-index into the interrupt group of `irq_index`.
958    pub fn trigger_irq(&self, irq_index: u32, vector: u32) -> Result<()> {
959        let irq = self
960            .irqs
961            .get(&irq_index)
962            .ok_or(VfioError::VfioDeviceTriggerIrq)?;
963        if irq.count <= vector {
964            return Err(VfioError::VfioDeviceTriggerIrq);
965        }
966
967        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
968        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
969        irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
970        irq_set[0].index = irq_index;
971        irq_set[0].start = vector;
972        irq_set[0].count = 1;
973
974        vfio_syscall::set_device_irqs(self, irq_set.as_slice())
975            .map_err(|_| VfioError::VfioDeviceTriggerIrq)
976    }
977
978    /// Enables a VFIO device IRQs.
979    /// This maps a vector of EventFds to all VFIO managed interrupts. In other words, this
980    /// tells VFIO which EventFd to write into whenever one of the device interrupt vector
981    /// is triggered.
982    ///
983    /// # Arguments
984    /// * `irq_index` - The type (INTX, MSI or MSI-X) of interrupts to enable.
985    /// * `event_fds` - The EventFds vector that matches all the supported VFIO interrupts.
986    pub fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<()> {
987        let irq = self
988            .irqs
989            .get(&irq_index)
990            .ok_or(VfioError::VfioDeviceEnableIrq)?;
991        if irq.count == 0 || (irq.count as usize) < event_fds.len() {
992            return Err(VfioError::VfioDeviceEnableIrq);
993        }
994
995        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(event_fds.len());
996        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32
997            + (event_fds.len() * mem::size_of::<u32>()) as u32;
998        irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
999        irq_set[0].index = irq_index;
1000        irq_set[0].start = 0;
1001        irq_set[0].count = event_fds.len() as u32;
1002
1003        {
1004            // irq_set.data could be none, bool or fd according to flags, so irq_set.data
1005            // is u8 default, here irq_set.data is a vector of fds as u32, so 4 default u8
1006            // are combined together as u32 for each fd.
1007            // SAFETY: It is safe as enough space is reserved through
1008            // vec_with_array_field(u32)<event_fds.len()>.
1009            let fds = unsafe {
1010                irq_set[0]
1011                    .data
1012                    .as_mut_slice(event_fds.len() * mem::size_of::<u32>())
1013            };
1014            for (index, event_fd) in event_fds.iter().enumerate() {
1015                let fds_offset = index * mem::size_of::<u32>();
1016                let fd = &mut fds[fds_offset..fds_offset + mem::size_of::<u32>()];
1017                NativeEndian::write_u32(fd, event_fd.as_raw_fd() as u32);
1018            }
1019        }
1020
1021        vfio_syscall::set_device_irqs(self, irq_set.as_slice())
1022            .map_err(|_| VfioError::VfioDeviceEnableIrq)
1023    }
1024
1025    /// Sets a VFIO irq's resample fd.
1026    /// This allows to set the signaling for an ACTION_UNMASK action. Once the resample fd
1027    /// is set, vfio can auto-unmask the INTX interrupt when the resamplefd is triggered.
1028    ///
1029    /// # Arguments
1030    /// * `irq_index` - INTX (the only type support to set resample fd)
1031    /// * `event_rfds` - The resample EventFds will be set to vfio.
1032    pub fn set_irq_resample_fd(&self, irq_index: u32, event_rfds: Vec<&EventFd>) -> Result<()> {
1033        let irq = self
1034            .irqs
1035            .get(&irq_index)
1036            .ok_or(VfioError::VfioDeviceSetIrqResampleFd)?;
1037        // Currently the VFIO driver only support MASK/UNMASK INTX, so count is hard-coded to 1.
1038        if irq.count != 1
1039            || (irq.count as usize) < event_rfds.len()
1040            || irq.index != VFIO_PCI_INTX_IRQ_INDEX
1041        {
1042            return Err(VfioError::VfioDeviceSetIrqResampleFd);
1043        }
1044
1045        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(event_rfds.len());
1046        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32
1047            + (event_rfds.len() * mem::size_of::<u32>()) as u32;
1048        irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1049        irq_set[0].index = irq_index;
1050        irq_set[0].start = 0;
1051        irq_set[0].count = event_rfds.len() as u32;
1052
1053        {
1054            // irq_set.data could be none, bool or fd according to flags, so irq_set.data
1055            // is u8 default, here irq_set.data is a vector of fds as u32, so 4 default u8
1056            // are combined together as u32 for each fd.
1057            // SAFETY: It is safe as enough space is reserved through
1058            // vec_with_array_field(u32)<event_fds.len()>.
1059            let fds = unsafe {
1060                irq_set[0]
1061                    .data
1062                    .as_mut_slice(event_rfds.len() * mem::size_of::<u32>())
1063            };
1064            for (index, event_fd) in event_rfds.iter().enumerate() {
1065                let fds_offset = index * mem::size_of::<u32>();
1066                let fd = &mut fds[fds_offset..fds_offset + mem::size_of::<u32>()];
1067                NativeEndian::write_u32(fd, event_fd.as_raw_fd() as u32);
1068            }
1069        }
1070
1071        vfio_syscall::set_device_irqs(self, irq_set.as_slice())
1072            .map_err(|_| VfioError::VfioDeviceSetIrqResampleFd)
1073    }
1074
1075    /// Disables a VFIO device IRQs
1076    ///
1077    /// # Arguments
1078    /// * `irq_index` - The type (INTX, MSI or MSI-X) of interrupts to disable.
1079    pub fn disable_irq(&self, irq_index: u32) -> Result<()> {
1080        let irq = self
1081            .irqs
1082            .get(&irq_index)
1083            .ok_or(VfioError::VfioDeviceDisableIrq)?;
1084        // Currently the VFIO driver only support MASK/UNMASK INTX, so count is hard-coded to 1.
1085        if irq.count == 0 {
1086            return Err(VfioError::VfioDeviceDisableIrq);
1087        }
1088
1089        // Individual subindex interrupts can be disabled using the -1 value for DATA_EVENTFD or
1090        // the index can be disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
1091        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1092        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1093        irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1094        irq_set[0].index = irq_index;
1095        irq_set[0].start = 0;
1096        irq_set[0].count = 0;
1097
1098        vfio_syscall::set_device_irqs(self, irq_set.as_slice())
1099            .map_err(|_| VfioError::VfioDeviceDisableIrq)
1100    }
1101
1102    /// Unmask IRQ
1103    ///
1104    /// # Arguments
1105    /// * `irq_index` - The type (INTX, MSI or MSI-X) of interrupts to unmask.
1106    pub fn unmask_irq(&self, irq_index: u32) -> Result<()> {
1107        let irq = self
1108            .irqs
1109            .get(&irq_index)
1110            .ok_or(VfioError::VfioDeviceUnmaskIrq)?;
1111        // Currently the VFIO driver only support MASK/UNMASK INTX, so count is hard-coded to 1.
1112        if irq.count == 0 || irq.count != 1 || irq.index != VFIO_PCI_INTX_IRQ_INDEX {
1113            return Err(VfioError::VfioDeviceUnmaskIrq);
1114        }
1115
1116        let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1117        irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1118        irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1119        irq_set[0].index = irq_index;
1120        irq_set[0].start = 0;
1121        irq_set[0].count = 1;
1122
1123        vfio_syscall::set_device_irqs(self, irq_set.as_slice())
1124            .map_err(|_| VfioError::VfioDeviceUnmaskIrq)
1125    }
1126
1127    /// Wrapper to enable MSI IRQs.
1128    pub fn enable_msi(&self, fds: Vec<&EventFd>) -> Result<()> {
1129        self.enable_irq(VFIO_PCI_MSI_IRQ_INDEX, fds)
1130    }
1131
1132    /// Wrapper to disable MSI IRQs.
1133    pub fn disable_msi(&self) -> Result<()> {
1134        self.disable_irq(VFIO_PCI_MSI_IRQ_INDEX)
1135    }
1136
1137    /// Wrapper to enable MSI-X IRQs.
1138    pub fn enable_msix(&self, fds: Vec<&EventFd>) -> Result<()> {
1139        self.enable_irq(VFIO_PCI_MSIX_IRQ_INDEX, fds)
1140    }
1141
1142    /// Wrapper to disable MSI-X IRQs.
1143    pub fn disable_msix(&self) -> Result<()> {
1144        self.disable_irq(VFIO_PCI_MSIX_IRQ_INDEX)
1145    }
1146
1147    /// Get a region's flag.
1148    ///
1149    /// # Arguments
1150    /// * `index` - The index of memory region.
1151    pub fn get_region_flags(&self, index: u32) -> u32 {
1152        match self.regions.get(index as usize) {
1153            Some(v) => v.flags,
1154            None => 0,
1155        }
1156    }
1157
1158    /// Get a region's offset.
1159    ///
1160    /// # Arguments
1161    /// * `index` - The index of memory region.
1162    pub fn get_region_offset(&self, index: u32) -> u64 {
1163        match self.regions.get(index as usize) {
1164            Some(v) => v.offset,
1165            None => 0,
1166        }
1167    }
1168
1169    /// Get a region's size.
1170    ///
1171    /// # Arguments
1172    /// * `index` - The index of memory region.
1173    pub fn get_region_size(&self, index: u32) -> u64 {
1174        match self.regions.get(index as usize) {
1175            Some(v) => v.size,
1176            None => {
1177                warn!("get_region_size with invalid index: {index}");
1178                0
1179            }
1180        }
1181    }
1182
1183    /// Get region's list of capabilities
1184    ///
1185    /// # Arguments
1186    /// * `index` - The index of memory region.
1187    pub fn get_region_caps(&self, index: u32) -> Vec<VfioRegionInfoCap> {
1188        match self.regions.get(index as usize) {
1189            Some(v) => v.caps.clone(),
1190            None => {
1191                warn!("get_region_caps with invalid index: {index}");
1192                Vec::new()
1193            }
1194        }
1195    }
1196
1197    /// Read region's data from VFIO device into buf
1198    ///
1199    /// # Arguments
1200    /// * `index`: region num
1201    /// * `buf`: data destination and buf length is read size
1202    /// * `addr`: offset in the region
1203    pub fn region_read(&self, index: u32, buf: &mut [u8], addr: u64) {
1204        let region: &VfioRegion = match self.regions.get(index as usize) {
1205            Some(v) => v,
1206            None => {
1207                warn!("region read with invalid index: {index}");
1208                return;
1209            }
1210        };
1211
1212        let size = buf.len() as u64;
1213        if size > region.size || addr + size > region.size {
1214            warn!("region read with invalid parameter, add: {addr}, size: {size}");
1215            return;
1216        }
1217
1218        if let Err(e) = self.device.read_exact_at(buf, region.offset + addr) {
1219            warn!("Failed to read region in index: {index}, addr: {addr}, error: {e}");
1220        }
1221    }
1222
1223    /// Write the data from buf into a vfio device region
1224    ///
1225    /// # Arguments
1226    /// * `index`: region num
1227    /// * `buf`: data src and buf length is write size
1228    /// * `addr`: offset in the region
1229    pub fn region_write(&self, index: u32, buf: &[u8], addr: u64) {
1230        let stub: &VfioRegion = match self.regions.get(index as usize) {
1231            Some(v) => v,
1232            None => {
1233                warn!("region write with invalid index: {index}");
1234                return;
1235            }
1236        };
1237
1238        let size = buf.len() as u64;
1239        if size > stub.size
1240            || addr + size > stub.size
1241            || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1242        {
1243            warn!("region write with invalid parameter, add: {addr}, size: {size}");
1244            return;
1245        }
1246
1247        if let Err(e) = self.device.write_all_at(buf, stub.offset + addr) {
1248            warn!("Failed to write region in index: {index}, addr: {addr}, error: {e}");
1249        }
1250    }
1251
1252    /// Return the maximum numner of interrupts a VFIO device can request.
1253    pub fn max_interrupts(&self) -> u32 {
1254        let mut max_interrupts = 0;
1255        let irq_indexes = vec![
1256            VFIO_PCI_INTX_IRQ_INDEX,
1257            VFIO_PCI_MSI_IRQ_INDEX,
1258            VFIO_PCI_MSIX_IRQ_INDEX,
1259        ];
1260
1261        for index in irq_indexes {
1262            if let Some(irq_info) = self.irqs.get(&index) {
1263                if irq_info.count > max_interrupts {
1264                    max_interrupts = irq_info.count;
1265                }
1266            }
1267        }
1268
1269        max_interrupts
1270    }
1271}
1272
1273impl AsRawFd for VfioDevice {
1274    fn as_raw_fd(&self) -> RawFd {
1275        self.device.as_raw_fd()
1276    }
1277}
1278
1279impl Drop for VfioDevice {
1280    fn drop(&mut self) {
1281        // ManuallyDrop is needed here because we need to ensure that VfioDevice::device is closed
1282        // before dropping VfioDevice::group, otherwise it will cause EBUSY when putting the
1283        // group object.
1284        if let Some(container) = self.vfio_ops.as_any().downcast_ref::<VfioContainer>() {
1285            // SAFETY: we own the File object.
1286            unsafe {
1287                ManuallyDrop::drop(&mut self.device);
1288            }
1289
1290            let group_id = Self::get_group_id_from_path(&self.sysfspath).unwrap();
1291            let group = container.get_group(group_id).unwrap();
1292            container.put_group(group);
1293        }
1294    }
1295}
1296
1297#[cfg(test)]
1298mod tests {
1299    use super::*;
1300    use std::mem::size_of;
1301    use vm_memory::{GuestAddress, GuestMemoryMmap};
1302    use vmm_sys_util::tempfile::TempFile;
1303
1304    impl VfioGroup {
1305        pub(crate) fn open_group_file(id: u32) -> Result<File> {
1306            let tmp_file = TempFile::new().unwrap();
1307            OpenOptions::new()
1308                .read(true)
1309                .write(true)
1310                .open(tmp_file.as_path())
1311                .map_err(|e| VfioError::OpenGroup(e, id.to_string()))
1312        }
1313    }
1314
1315    impl VfioDevice {
1316        pub(crate) fn get_group_id_from_path(_sysfspath: &Path) -> Result<u32> {
1317            Ok(3)
1318        }
1319    }
1320
1321    #[test]
1322    fn test_vfio_region_info_with_cap() {
1323        let reg = vfio_region_info {
1324            argsz: 129,
1325            flags: 0,
1326            index: 5,
1327            cap_offset: 0,
1328            size: 0,
1329            offset: 0,
1330        };
1331        let cap = vfio_region_info_with_cap::from_region_info(&reg);
1332
1333        assert_eq!(size_of::<vfio_region_info>(), 32);
1334        assert_eq!(cap.len(), 5);
1335        assert_eq!(cap[0].region_info.argsz, 129);
1336        assert_eq!(cap[0].region_info.index, 5);
1337
1338        let reg = vfio_region_info_with_cap::default();
1339        assert_eq!(reg.region_info.index, 0);
1340        assert_eq!(reg.region_info.argsz, 0);
1341    }
1342
1343    #[test]
1344    fn test_vfio_device_info() {
1345        let tmp_file = TempFile::new().unwrap();
1346        let device = File::open(tmp_file.as_path()).unwrap();
1347        let dev_info = vfio_syscall::create_dev_info_for_test();
1348        let device_info = VfioDeviceInfo::new(device, &dev_info);
1349
1350        let irqs = device_info.get_irqs().unwrap();
1351        assert_eq!(irqs.len(), 3);
1352        let irq = irqs.get(&0).unwrap();
1353        assert_eq!(irq.flags, VFIO_IRQ_INFO_MASKABLE);
1354        assert_eq!(irq.count, 1);
1355        assert_eq!(irq.index, 0);
1356        let irq = irqs.get(&1).unwrap();
1357        assert_eq!(irq.flags, VFIO_IRQ_INFO_EVENTFD);
1358        assert_eq!(irq.count, 32);
1359        assert_eq!(irq.index, 1);
1360        let irq = irqs.get(&2).unwrap();
1361        assert_eq!(irq.flags, VFIO_IRQ_INFO_EVENTFD);
1362        assert_eq!(irq.count, 2048);
1363        assert_eq!(irq.index, 2);
1364
1365        let regions = device_info.get_regions().unwrap();
1366        assert_eq!(regions.len(), 2);
1367        assert_eq!(regions[0].flags, 0);
1368        assert_eq!(regions[0].offset, 0x10000);
1369        assert_eq!(regions[0].size, 0x1000);
1370        assert_eq!(regions[0].caps.len(), 0);
1371
1372        assert_eq!(regions[1].flags, VFIO_REGION_INFO_FLAG_CAPS);
1373        assert_eq!(regions[1].offset, 0x20000);
1374        assert_eq!(regions[1].size, 0x2000);
1375        assert_eq!(regions[1].caps.len(), 3);
1376        assert_eq!(regions[1].caps[0], VfioRegionInfoCap::MsixMappable);
1377
1378        let ty = &regions[1].caps[1];
1379        if let VfioRegionInfoCap::Type(t) = ty {
1380            assert_eq!(t.type_, 0x5);
1381            assert_eq!(t.subtype, 0x6);
1382        } else {
1383            panic!("expect VfioRegionInfoCapType");
1384        }
1385
1386        let mmap = &regions[1].caps[2];
1387        if let VfioRegionInfoCap::SparseMmap(m) = mmap {
1388            assert_eq!(m.areas.len(), 1);
1389            assert_eq!(m.areas[0].size, 0x3);
1390            assert_eq!(m.areas[0].offset, 0x4);
1391        } else {
1392            panic!("expect VfioRegionInfoCapType");
1393        }
1394    }
1395
1396    fn create_vfio_container() -> VfioContainer {
1397        let tmp_file = TempFile::new().unwrap();
1398        let container = File::open(tmp_file.as_path()).unwrap();
1399
1400        VfioContainer {
1401            container,
1402            common: VfioCommon { device_fd: None },
1403            groups: Mutex::new(HashMap::new()),
1404        }
1405    }
1406
1407    #[test]
1408    fn test_vfio_container() {
1409        let container = create_vfio_container();
1410
1411        assert!(container.as_raw_fd() > 0);
1412        container.check_api_version().unwrap();
1413        container.check_extension(VFIO_TYPE1v2_IOMMU).unwrap();
1414
1415        let group = VfioGroup::new(1).unwrap();
1416        container.device_add_group(&group).unwrap();
1417        container.device_del_group(&group).unwrap();
1418
1419        let group = container.get_group(3).unwrap();
1420        assert_eq!(Arc::strong_count(&group), 2);
1421        assert_eq!(container.groups.lock().unwrap().len(), 1);
1422        let group2 = container.get_group(4).unwrap();
1423        assert_eq!(Arc::strong_count(&group2), 2);
1424        assert_eq!(container.groups.lock().unwrap().len(), 2);
1425
1426        let group3 = container.get_group(3).unwrap();
1427        assert_eq!(Arc::strong_count(&group), 3);
1428        let group4 = container.get_group(3).unwrap();
1429        assert_eq!(Arc::strong_count(&group), 4);
1430        container.put_group(group4);
1431        assert_eq!(Arc::strong_count(&group), 3);
1432        container.put_group(group3);
1433        assert_eq!(Arc::strong_count(&group), 2);
1434        container.put_group(group);
1435
1436        container.vfio_dma_map(0x1000, 0x1000, 0x8000).unwrap();
1437        container.vfio_dma_map(0x2000, 0x2000, 0x8000).unwrap_err();
1438        container.vfio_dma_unmap(0x1000, 0x1000).unwrap();
1439        container.vfio_dma_unmap(0x2000, 0x2000).unwrap_err();
1440    }
1441
1442    #[test]
1443    fn test_vfio_group() {
1444        let group = VfioGroup::new(1).unwrap();
1445        let tmp_file = TempFile::new().unwrap();
1446
1447        assert_eq!(group.id, 1);
1448        assert!(group.as_raw_fd() >= 0);
1449        let device = group.get_device(tmp_file.as_path()).unwrap();
1450        assert_eq!(device.num_irqs, 3);
1451        assert_eq!(device.num_regions, 9);
1452
1453        let regions = device.get_regions().unwrap();
1454        // test code skips VFIO_PCI_VGA_REGION_INDEX
1455        assert_eq!(regions.len(), 8)
1456    }
1457
1458    #[test]
1459    fn test_vfio_device() {
1460        let tmp_file = TempFile::new().unwrap();
1461        let container = Arc::new(create_vfio_container());
1462        let device = VfioDevice::new(tmp_file.as_path(), container.clone()).unwrap();
1463
1464        assert!(device.as_raw_fd() > 0);
1465        assert_eq!(device.max_interrupts(), 2048);
1466
1467        device.reset();
1468        assert_eq!(device.regions.len(), 8);
1469        assert_eq!(device.irqs.len(), 3);
1470
1471        assert!(device.get_irq_info(3).is_none());
1472        let irq = device.get_irq_info(2).unwrap();
1473        assert_eq!(irq.count, 2048);
1474
1475        device.trigger_irq(3, 0).unwrap_err();
1476        device.trigger_irq(2, 2048).unwrap_err();
1477        device.trigger_irq(2, 2047).unwrap();
1478        device.trigger_irq(2, 0).unwrap();
1479
1480        device.enable_irq(3, Vec::new()).unwrap_err();
1481        device.enable_irq(0, Vec::new()).unwrap_err();
1482        device.enable_irq(1, Vec::new()).unwrap();
1483
1484        device.set_irq_resample_fd(1, Vec::new()).unwrap_err();
1485        device.set_irq_resample_fd(0, Vec::new()).unwrap();
1486
1487        device.disable_irq(3).unwrap_err();
1488        device.disable_irq(0).unwrap_err();
1489        device.disable_irq(1).unwrap();
1490
1491        device.unmask_irq(3).unwrap_err();
1492        device.unmask_irq(1).unwrap_err();
1493        device.unmask_irq(0).unwrap();
1494
1495        device.enable_msi(Vec::new()).unwrap();
1496        device.disable_msi().unwrap();
1497        device.enable_msix(Vec::new()).unwrap();
1498        device.disable_msix().unwrap();
1499
1500        assert_eq!(device.get_region_flags(1), VFIO_REGION_INFO_FLAG_CAPS);
1501        assert_eq!(device.get_region_flags(7), 0);
1502        assert_eq!(device.get_region_flags(8), 0);
1503        assert_eq!(device.get_region_offset(1), 0x20000);
1504        assert_eq!(device.get_region_offset(7), 0x80000);
1505        assert_eq!(device.get_region_offset(8), 0);
1506        assert_eq!(device.get_region_size(1), 0x2000);
1507        assert_eq!(device.get_region_size(7), 0x8000);
1508        assert_eq!(device.get_region_size(8), 0);
1509        assert_eq!(device.get_region_caps(1).len(), 3);
1510        assert_eq!(device.get_region_caps(7).len(), 0);
1511        assert_eq!(device.get_region_caps(8).len(), 0);
1512
1513        let mut buf = [0u8; 16];
1514        device.region_read(8, &mut buf, 0x30000);
1515        device.region_read(7, &mut buf, 0x30000);
1516        device.region_read(1, &mut buf, 0x30000);
1517        device.region_write(8, &buf, 0x30000);
1518        device.region_write(7, &buf, 0x30000);
1519        device.region_write(1, &buf, 0x30000);
1520
1521        device.reset();
1522
1523        drop(device);
1524        assert_eq!(container.groups.lock().unwrap().len(), 0);
1525    }
1526
1527    #[test]
1528    #[allow(clippy::redundant_clone)]
1529    fn test_vfio_region_info_cap() {
1530        let v1 = VfioRegionInfoCap::Type(VfioRegionInfoCapType {
1531            type_: 1,
1532            subtype: 1,
1533        });
1534        let v2 = VfioRegionInfoCap::Type(VfioRegionInfoCapType {
1535            type_: 1,
1536            subtype: 2,
1537        });
1538
1539        assert_eq!(v1, v1.clone());
1540        assert_ne!(v1, v2);
1541
1542        let v3 = VfioRegionInfoCap::SparseMmap(VfioRegionInfoCapSparseMmap {
1543            areas: vec![VfioRegionSparseMmapArea { offset: 3, size: 4 }],
1544        });
1545        let v4 = VfioRegionInfoCap::SparseMmap(VfioRegionInfoCapSparseMmap {
1546            areas: vec![VfioRegionSparseMmapArea { offset: 5, size: 6 }],
1547        });
1548        assert_eq!(v3, v3.clone());
1549        assert_ne!(v3, v4);
1550        assert_ne!(v1, v4);
1551        assert_ne!(v1.clone(), v4);
1552
1553        let v5 = VfioRegionInfoCap::MsixMappable;
1554        assert_eq!(v5, v5.clone());
1555        assert_ne!(v5, v1);
1556        assert_ne!(v5, v3);
1557        assert_ne!(v5, v2.clone());
1558        assert_ne!(v5, v4.clone());
1559
1560        let v6 = VfioRegionInfoCap::Nvlink2Lnkspd(VfioRegionInfoCapNvlink2Lnkspd { link_speed: 7 });
1561        let v7 = VfioRegionInfoCap::Nvlink2Lnkspd(VfioRegionInfoCapNvlink2Lnkspd { link_speed: 8 });
1562        assert_eq!(v6, v6.clone());
1563        assert_ne!(v6, v7);
1564        assert_ne!(v6, v1);
1565        assert_ne!(v6, v2.clone());
1566        assert_ne!(v6, v4.clone());
1567
1568        let v8 = VfioRegionInfoCap::Nvlink2Ssatgt(VfioRegionInfoCapNvlink2Ssatgt { tgt: 9 });
1569        let v9 = VfioRegionInfoCap::Nvlink2Ssatgt(VfioRegionInfoCapNvlink2Ssatgt { tgt: 10 });
1570        assert_eq!(v8, v8.clone());
1571        assert_ne!(v8, v9);
1572        assert_ne!(v8, v1);
1573        assert_ne!(v8, v2.clone());
1574        assert_ne!(v8, v4.clone());
1575        assert_ne!(v8, v6.clone());
1576    }
1577
1578    #[test]
1579    fn test_vfio_map_guest_memory() {
1580        let addr1 = GuestAddress(0x1000);
1581        let mem1 = GuestMemoryMmap::<()>::from_ranges(&[(addr1, 0x1000)]).unwrap();
1582        let container = create_vfio_container();
1583
1584        container.vfio_map_guest_memory(&mem1).unwrap();
1585
1586        let addr2 = GuestAddress(0x3000);
1587        let mem2 = GuestMemoryMmap::<()>::from_ranges(&[(addr2, 0x1000)]).unwrap();
1588
1589        container.vfio_unmap_guest_memory(&mem2).unwrap_err();
1590
1591        let addr3 = GuestAddress(0x1000);
1592        let mem3 = GuestMemoryMmap::<()>::from_ranges(&[(addr3, 0x2000)]).unwrap();
1593
1594        container.vfio_unmap_guest_memory(&mem3).unwrap_err();
1595
1596        container.vfio_unmap_guest_memory(&mem1).unwrap();
1597    }
1598
1599    #[test]
1600    fn test_get_device_type() {
1601        let flags: u32 = VFIO_DEVICE_FLAGS_PCI;
1602        assert_eq!(flags, VfioDeviceInfo::get_device_type(&flags));
1603
1604        let flags: u32 = VFIO_DEVICE_FLAGS_PLATFORM;
1605        assert_eq!(flags, VfioDeviceInfo::get_device_type(&flags));
1606
1607        let flags: u32 = VFIO_DEVICE_FLAGS_AMBA;
1608        assert_eq!(flags, VfioDeviceInfo::get_device_type(&flags));
1609
1610        let flags: u32 = VFIO_DEVICE_FLAGS_CCW;
1611        assert_eq!(flags, VfioDeviceInfo::get_device_type(&flags));
1612
1613        let flags: u32 = VFIO_DEVICE_FLAGS_AP;
1614        assert_eq!(flags, VfioDeviceInfo::get_device_type(&flags));
1615    }
1616}