pci_driver/backends/vfio/
mod.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3/* ---------------------------------------------------------------------------------------------- */
4
5// override the crate-level `deny(unsafe_op_in_unsafe_fn)`
6#[cfg_attr(feature = "_unsafe-op-in-unsafe-fn", allow(unsafe_op_in_unsafe_fn))]
7#[allow(
8    dead_code,
9    non_camel_case_types,
10    non_snake_case,
11    non_upper_case_globals
12)]
13mod bindings;
14
15mod containers;
16mod ioctl;
17mod regions;
18
19use libc::{mmap64, munmap, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE};
20use std::alloc::{self, Layout};
21use std::ffi::CString;
22use std::fmt::Debug;
23use std::fs::File;
24use std::io::{self, ErrorKind};
25use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
26use std::os::unix::prelude::OsStrExt;
27use std::path::Path;
28use std::sync::Arc;
29use std::{mem, ptr};
30
31use crate::backends::vfio::bindings::{
32    __IncompleteArrayField, vfio_device_info, vfio_irq_info, vfio_irq_set, VFIO_DEVICE_FLAGS_PCI,
33    VFIO_IRQ_INFO_EVENTFD, VFIO_IRQ_SET_ACTION_TRIGGER, VFIO_IRQ_SET_DATA_EVENTFD,
34    VFIO_IRQ_SET_DATA_NONE, VFIO_PCI_BAR0_REGION_INDEX, VFIO_PCI_BAR5_REGION_INDEX,
35    VFIO_PCI_CONFIG_REGION_INDEX, VFIO_PCI_INTX_IRQ_INDEX, VFIO_PCI_MSIX_IRQ_INDEX,
36    VFIO_PCI_MSI_IRQ_INDEX, VFIO_PCI_ROM_REGION_INDEX,
37};
38use crate::backends::vfio::ioctl::{
39    vfio_device_get_info, vfio_device_get_irq_info, vfio_device_reset, vfio_device_set_irqs,
40    vfio_group_get_device_fd,
41};
42use crate::backends::vfio::regions::{
43    set_up_bar_or_rom, set_up_config_space, VfioUnmappedPciRegion,
44};
45use crate::config::PciConfig;
46use crate::device::{PciDevice, PciDeviceInternal};
47use crate::interrupts::{PciInterruptKind, PciInterrupts};
48use crate::iommu::PciIommu;
49use crate::regions::{BackedByPciSubregion, OwningPciRegion, Permissions, RegionIdentifier};
50
51pub use containers::VfioContainer;
52
53/* ---------------------------------------------------------------------------------------------- */
54
55fn get_device_address<P: AsRef<Path>>(device_sysfs_path: P) -> io::Result<CString> {
56    let path = device_sysfs_path.as_ref().canonicalize()?;
57    let address = path.file_name().unwrap();
58
59    Ok(CString::new(address.as_bytes()).unwrap())
60}
61
62fn get_device_group_number<P: AsRef<Path>>(device_sysfs_path: P) -> io::Result<u32> {
63    let group_sysfs_path = device_sysfs_path
64        .as_ref()
65        .join("iommu_group")
66        .canonicalize()?;
67
68    let group_dir_name = group_sysfs_path
69        .file_name()
70        .unwrap()
71        .to_str()
72        .ok_or_else(|| io::Error::new(ErrorKind::Other, "TODO"))?;
73
74    group_dir_name
75        .parse()
76        .map_err(|_| io::Error::new(ErrorKind::Other, "TODO"))
77}
78
79/* ---------------------------------------------------------------------------------------------- */
80
81/// Provides control over a PCI device using VFIO.
82#[derive(Debug)]
83pub struct VfioPciDevice {
84    inner: Arc<VfioPciDeviceInner>,
85}
86
87impl VfioPciDevice {
88    /// Creates a new [`VfioContainer`] containing only the group that contains the given vfio-pci
89    /// device, then calls [`VfioPciDevice::open_in_container`] with the same path and the created
90    /// container.
91    ///
92    /// Note that this only works if no other [`VfioContainer`] already contains the device's group,
93    /// and so you must use [`VfioPciDevice::open_in_container`] if you want to drive several
94    /// devices from the same VFIO group.
95    pub fn open<P: AsRef<Path>>(sysfs_path: P) -> io::Result<VfioPciDevice> {
96        let group_number = get_device_group_number(&sysfs_path)?;
97        let container = Arc::new(VfioContainer::new(&[group_number])?);
98
99        Self::open_in_container(sysfs_path, container)
100    }
101
102    /// Opens a vfio-pci device and adds it to the given container.
103    ///
104    /// `sysfs_path` must correspond to the device's sysfs directory, *e.g.*,
105    /// `/sys/bus/pci/devices/0000:00:01.0`. `container` must contain the group to which the device
106    /// belongs.
107    ///
108    /// Returns a `VfioPciDevice` corresponding to the opened device.
109    pub fn open_in_container<P: AsRef<Path>>(
110        sysfs_path: P,
111        container: Arc<VfioContainer>,
112    ) -> io::Result<VfioPciDevice> {
113        let device_address = get_device_address(&sysfs_path)?;
114        let group_number = get_device_group_number(&sysfs_path)?;
115
116        // get group file
117
118        let group_file = container
119            .groups
120            .get(&group_number)
121            .ok_or_else(|| io::Error::new(ErrorKind::Other, "TODO"))?;
122
123        // get device file
124
125        let device_file = unsafe {
126            let fd = vfio_group_get_device_fd(group_file.as_raw_fd(), device_address.as_ptr())?;
127            Arc::new(File::from_raw_fd(fd))
128        };
129
130        // validate device info
131
132        let mut device_info = vfio_device_info {
133            argsz: mem::size_of::<vfio_device_info>() as u32,
134            flags: 0,
135            num_regions: 0,
136            num_irqs: 0,
137            cap_offset: 0,
138        };
139
140        unsafe { vfio_device_get_info(device_file.as_raw_fd(), &mut device_info)? };
141
142        if device_info.flags & VFIO_DEVICE_FLAGS_PCI == 0
143            || device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
144            || device_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
145        {
146            return Err(io::Error::new(ErrorKind::Other, "TODO"));
147        }
148
149        // get interrupt info
150
151        let get_max_interrupts = |index| {
152            let mut irq_info = vfio_irq_info {
153                argsz: mem::size_of::<vfio_irq_info>() as u32,
154                flags: 0,
155                index,
156                count: 0,
157            };
158
159            unsafe { vfio_device_get_irq_info(device_file.as_raw_fd(), &mut irq_info)? };
160
161            if irq_info.flags & VFIO_IRQ_INFO_EVENTFD == 0 {
162                return Err(io::Error::new(ErrorKind::Other, "TODO"));
163            }
164
165            Ok(irq_info.count as usize)
166        };
167
168        let max_interrupts = [
169            get_max_interrupts(VFIO_PCI_INTX_IRQ_INDEX)?,
170            get_max_interrupts(VFIO_PCI_MSI_IRQ_INDEX)?,
171            get_max_interrupts(VFIO_PCI_MSIX_IRQ_INDEX)?,
172        ];
173
174        // set up config space
175
176        let config_region = set_up_config_space(&device_file)?;
177
178        // set up BARs and ROM
179
180        let bars = (VFIO_PCI_BAR0_REGION_INDEX..=VFIO_PCI_BAR5_REGION_INDEX)
181            .map(|index| set_up_bar_or_rom(&device_file, index))
182            .collect::<io::Result<_>>()?;
183
184        let rom = set_up_bar_or_rom(&device_file, VFIO_PCI_ROM_REGION_INDEX)?;
185
186        // success
187
188        Ok(VfioPciDevice {
189            inner: Arc::new(VfioPciDeviceInner {
190                container,
191                file: device_file,
192                config_region,
193                bars,
194                rom,
195                max_interrupts,
196            }),
197        })
198    }
199
200    /// Returns a reference to the container to which the device's group belongs.
201    pub fn container(&self) -> &Arc<VfioContainer> {
202        &self.inner.container
203    }
204}
205
206impl crate::device::Sealed for VfioPciDevice {}
207impl PciDevice for VfioPciDevice {
208    fn config(&self) -> PciConfig {
209        PciConfig::backed_by(&self.inner.config_region)
210    }
211
212    fn bar(&self, index: usize) -> Option<OwningPciRegion> {
213        let bar = self.inner.bars.get(index)?.as_ref()?;
214
215        Some(OwningPciRegion::new(
216            Arc::<VfioPciDeviceInner>::clone(&self.inner),
217            Arc::<VfioUnmappedPciRegion>::clone(bar),
218            RegionIdentifier::Bar(index),
219            bar.is_mappable(),
220        ))
221    }
222
223    fn rom(&self) -> Option<OwningPciRegion> {
224        let rom = self.inner.rom.as_ref()?;
225
226        Some(OwningPciRegion::new(
227            Arc::<VfioPciDeviceInner>::clone(&self.inner),
228            Arc::<VfioUnmappedPciRegion>::clone(rom),
229            RegionIdentifier::Rom,
230            rom.is_mappable(),
231        ))
232    }
233
234    fn iommu(&self) -> PciIommu {
235        self.inner.container.iommu()
236    }
237
238    fn interrupts(&self) -> PciInterrupts {
239        PciInterrupts {
240            device: &*self.inner,
241        }
242    }
243
244    fn reset(&self) -> io::Result<()> {
245        unsafe { vfio_device_reset(self.inner.file.as_raw_fd())? };
246        Ok(())
247    }
248}
249
250/* ---------------------------------------------------------------------------------------------- */
251
252#[derive(Debug)]
253struct VfioPciDeviceInner {
254    container: Arc<VfioContainer>,
255
256    file: Arc<File>,
257
258    config_region: VfioUnmappedPciRegion,
259    bars: Box<[Option<Arc<VfioUnmappedPciRegion>>]>,
260    rom: Option<Arc<VfioUnmappedPciRegion>>,
261
262    max_interrupts: [usize; 3],
263}
264
265impl PciDeviceInternal for VfioPciDeviceInner {
266    // BARs / ROM
267
268    fn region_map(
269        &self,
270        identifier: RegionIdentifier,
271        offset: u64,
272        len: usize,
273        permissions: Permissions,
274    ) -> io::Result<*mut u8> {
275        let region = match identifier {
276            RegionIdentifier::Bar(index) => &self.bars[index],
277            RegionIdentifier::Rom => &self.rom,
278        };
279
280        let region = region.as_ref().unwrap();
281
282        let prot_flags = match permissions {
283            Permissions::Read => PROT_READ,
284            Permissions::Write => PROT_WRITE,
285            Permissions::ReadWrite => PROT_READ | PROT_WRITE,
286        };
287
288        let address = unsafe {
289            mmap64(
290                ptr::null_mut(),
291                len,
292                prot_flags,
293                MAP_SHARED,
294                self.file.as_raw_fd(),
295                region.offset_in_device_file() as i64 + offset as i64,
296            )
297        };
298
299        if address == MAP_FAILED {
300            Err(io::Error::last_os_error())
301        } else {
302            Ok(address.cast())
303        }
304    }
305
306    unsafe fn region_unmap(&self, _identifier: RegionIdentifier, address: *mut u8, size: usize) {
307        let result = if unsafe { munmap(address.cast(), size) } == 0 {
308            Ok(())
309        } else {
310            Err(io::Error::last_os_error())
311        };
312
313        // TODO: Do something other than crash on failure?
314        result.unwrap();
315    }
316
317    // Interrupts
318
319    fn interrupts_max(&self, kind: PciInterruptKind) -> usize {
320        self.max_interrupts[kind as usize]
321    }
322
323    fn interrupts_enable(&self, kind: PciInterruptKind, eventfds: &[RawFd]) -> io::Result<()> {
324        if eventfds.len() > self.max_interrupts[kind as usize] {
325            return Err(io::Error::new(ErrorKind::Other, "TODO"));
326        }
327
328        // allocate memory for vfio_irq_set
329
330        let eventfds_size = std::mem::size_of_val(eventfds);
331        let total_size = mem::size_of::<vfio_irq_set>() + eventfds_size;
332
333        let layout = Layout::from_size_align(total_size, 4)
334            .map_err(|_| io::Error::new(ErrorKind::Other, "TODO"))?;
335
336        let mem = unsafe { alloc::alloc(layout) };
337
338        if mem.is_null() {
339            alloc::handle_alloc_error(layout);
340        }
341
342        // initialize vfio_irq_set
343
344        let irq_set = mem as *mut vfio_irq_set;
345
346        unsafe {
347            (*irq_set).argsz = total_size as u32;
348            (*irq_set).flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
349            (*irq_set).index = interrupt_index_from_kind(kind);
350            (*irq_set).start = 0;
351            (*irq_set).count = eventfds.len() as u32;
352        }
353
354        let eventfd_mem_iter = unsafe {
355            (*irq_set)
356                .data
357                .as_mut_slice(eventfds_size)
358                .chunks_exact_mut(4)
359        };
360
361        for (mem, eventfd) in eventfd_mem_iter.zip(eventfds) {
362            mem.copy_from_slice(&eventfd.to_ne_bytes());
363        }
364
365        // enable interrupt vectors
366
367        unsafe { vfio_device_set_irqs(self.file.as_raw_fd(), irq_set)? };
368
369        Ok(())
370    }
371
372    fn interrupts_disable(&self, kind: PciInterruptKind) -> io::Result<()> {
373        let irq_set = vfio_irq_set {
374            argsz: mem::size_of::<vfio_irq_set>() as u32,
375            flags: VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
376            index: interrupt_index_from_kind(kind),
377            start: 0,
378            count: 0,
379            data: __IncompleteArrayField::new(),
380        };
381
382        unsafe { vfio_device_set_irqs(self.file.as_raw_fd(), &irq_set)? };
383
384        Ok(())
385    }
386}
387
388fn interrupt_index_from_kind(kind: PciInterruptKind) -> u32 {
389    match kind {
390        PciInterruptKind::Intx => VFIO_PCI_INTX_IRQ_INDEX,
391        PciInterruptKind::Msi => VFIO_PCI_MSI_IRQ_INDEX,
392        PciInterruptKind::MsiX => VFIO_PCI_MSIX_IRQ_INDEX,
393    }
394}
395
396/* ---------------------------------------------------------------------------------------------- */