pci_driver/backends/vfio/
containers.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3/* ---------------------------------------------------------------------------------------------- */
4
5use std::alloc::{self, Layout};
6use std::collections::{BTreeSet, HashMap};
7use std::fmt::Debug;
8use std::fs::{File, OpenOptions};
9use std::io::{self, ErrorKind};
10use std::mem;
11use std::ops::Range;
12use std::os::unix::io::AsRawFd;
13use std::os::unix::prelude::RawFd;
14use std::ptr;
15
16use crate::backends::vfio::bindings::{
17    vfio_group_status, vfio_info_cap_header, vfio_iommu_type1_dma_map, vfio_iommu_type1_dma_unmap,
18    vfio_iommu_type1_info, VFIO_TYPE1v2_IOMMU, __IncompleteArrayField,
19    vfio_iommu_type1_info_cap_iova_range, vfio_iommu_type1_info_dma_avail, vfio_iova_range,
20    VFIO_API_VERSION, VFIO_DMA_MAP_FLAG_READ, VFIO_DMA_MAP_FLAG_WRITE, VFIO_GROUP_FLAGS_VIABLE,
21    VFIO_IOMMU_INFO_PGSIZES, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL,
22};
23use crate::backends::vfio::ioctl::{
24    vfio_check_extension, vfio_get_api_version, vfio_group_get_status, vfio_group_set_container,
25    vfio_iommu_get_info, vfio_iommu_map_dma, vfio_iommu_unmap_dma, vfio_set_iommu,
26};
27use crate::iommu::{PciIommu, PciIommuInternal};
28use crate::regions::Permissions;
29
30/* ---------------------------------------------------------------------------------------------- */
31
32fn open_group(group_number: u32) -> io::Result<File> {
33    // open group
34
35    let file = OpenOptions::new()
36        .read(true)
37        .write(true)
38        .open(format!("/dev/vfio/{}", group_number))?;
39
40    // check if group is viable
41
42    let mut group_status = vfio_group_status {
43        argsz: mem::size_of::<vfio_group_status>() as u32,
44        flags: 0,
45    };
46
47    unsafe { vfio_group_get_status(file.as_raw_fd(), &mut group_status)? };
48
49    if group_status.flags & VFIO_GROUP_FLAGS_VIABLE == 0 {
50        return Err(io::Error::new(
51            ErrorKind::Other,
52            "Group is not viable; are all devices in the group bound to vfio or unbound?",
53        ));
54    }
55
56    // success
57
58    Ok(file)
59}
60
61struct IommuInfo {
62    iova_alignment: usize,
63    max_num_mappings: u32,
64    valid_iova_ranges: Box<[Range<u64>]>,
65}
66
67fn get_iommu_info(device_fd: RawFd) -> io::Result<IommuInfo> {
68    let mut iommu_info = vfio_iommu_type1_info {
69        argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
70        flags: 0,
71        iova_pgsizes: 0,
72        cap_offset: 0,
73    };
74
75    unsafe { vfio_iommu_get_info(device_fd, &mut iommu_info)? };
76
77    // get page size
78
79    if iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES == 0 {
80        return Err(io::Error::new(
81            ErrorKind::Other,
82            "VFIO didn't report IOMMU mapping alignment requirement",
83        ));
84    }
85
86    let iova_alignment = 1usize << iommu_info.iova_pgsizes.trailing_zeros();
87
88    // ensure there are capabilities
89
90    if iommu_info.argsz <= mem::size_of::<vfio_iommu_type1_info>() as u32 {
91        return Err(io::Error::new(
92            ErrorKind::Other,
93            "VFIO reported no IOMMU capabilities",
94        ));
95    }
96
97    // actual vfio_iommu_type1_info struct is bigger, must re-retrieve it with full argsz
98
99    let layout = Layout::from_size_align(iommu_info.argsz as usize, 8)
100        .map_err(|_| io::Error::new(ErrorKind::Other, "TODO"))?;
101
102    let bigger_info = unsafe { alloc::alloc(layout) } as *mut vfio_iommu_type1_info;
103    if bigger_info.is_null() {
104        alloc::handle_alloc_error(layout);
105    }
106
107    unsafe {
108        *bigger_info = vfio_iommu_type1_info {
109            argsz: iommu_info.argsz,
110            flags: 0,
111            iova_pgsizes: 0,
112            cap_offset: 0,
113        };
114    }
115
116    unsafe { vfio_iommu_get_info(device_fd, bigger_info)? };
117
118    let mut ranges = get_iommu_cap_iova_ranges(bigger_info)?;
119
120    // validate and adjust ranges
121
122    ranges.sort_by_key(|r| r.start);
123
124    if !ranges.is_empty() && ranges[0].start == 0 {
125        // First valid IOVA is 0x0, which can cause problems with some protocols or hypervisors.
126        // Make the user's life easier by dropping the first page of IOVA space.
127        ranges[0].start = iova_alignment as u64;
128        if ranges[0].start >= ranges[0].end {
129            ranges.remove(0);
130        }
131    }
132
133    if !ranges.windows(2).all(|r| r[0].end <= r[1].start) {
134        return Err(io::Error::new(
135            ErrorKind::Other,
136            "VFIO reported overlapping IOVA ranges",
137        ));
138    }
139
140    let valid_iova_ranges = ranges.into_boxed_slice();
141
142    let max_num_mappings = get_iommu_dma_avail(bigger_info)?;
143
144    Ok(IommuInfo {
145        iova_alignment,
146        max_num_mappings,
147        valid_iova_ranges,
148    })
149}
150
151fn get_iommu_cap(
152    info: *const vfio_iommu_type1_info,
153    id: u32,
154) -> io::Result<*const vfio_info_cap_header> {
155    let mut offset = unsafe { ptr::addr_of!((*info).cap_offset).read_unaligned() } as usize;
156
157    while offset != 0 {
158        let header = unsafe { info.cast::<u8>().add(offset).cast::<vfio_info_cap_header>() };
159
160        if unsafe { ptr::addr_of!((*header).id).read_unaligned() } as u32 == id {
161            return Ok(header);
162        }
163
164        offset = unsafe { ptr::addr_of!((*header).next).read_unaligned() } as usize;
165    }
166
167    Err(io::Error::new(
168        ErrorKind::Other,
169        format!("VFIO did not provide IOMMU capability with ID {}", id),
170    ))
171}
172
173fn get_iommu_cap_iova_ranges(info: *const vfio_iommu_type1_info) -> io::Result<Vec<Range<u64>>> {
174    let cap = get_iommu_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE)?
175        .cast::<vfio_iommu_type1_info_cap_iova_range>();
176
177    let first_range = unsafe { ptr::addr_of!((*cap).iova_ranges) } as *const vfio_iova_range;
178    let num_ranges = unsafe { ptr::addr_of!((*cap).nr_iovas).read_unaligned() } as usize;
179
180    let ranges = (0..num_ranges)
181        .map(|i| {
182            let range = unsafe { first_range.add(i).read_unaligned() };
183            range.start..range.end
184        })
185        .collect();
186
187    Ok(ranges)
188}
189
190fn get_iommu_dma_avail(info: *const vfio_iommu_type1_info) -> io::Result<u32> {
191    let cap = get_iommu_cap(info, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL)?
192        .cast::<vfio_iommu_type1_info_dma_avail>();
193
194    Ok(unsafe { ptr::addr_of!((*cap).avail).read_unaligned() })
195}
196
197/* ---------------------------------------------------------------------------------------------- */
198
199/// A VFIO container representing an IOMMU context that may contain zero or more VFIO groups.
200#[derive(Debug)]
201pub struct VfioContainer {
202    file: File,
203    group_numbers: Box<[u32]>,
204    pub(crate) groups: HashMap<u32, File>,
205    iommu_iova_alignment: usize,
206    iommu_max_num_mappings: u32,
207    iommu_valid_iova_ranges: Box<[Range<u64>]>,
208}
209
210impl VfioContainer {
211    /// Creates a new, empty [`VfioContainer`].
212    ///
213    /// This fails if not all devices in all given groups have been bound to vfio-pci (the VFIO docs
214    /// say "it's also sufficient to only unbind the device from host drivers if a VFIO driver is
215    /// unavailable").
216    ///
217    /// This fails if any of the groups is already open elsewhere, for instance if another
218    /// [`VfioContainer`] containing one of the groups already currently exists.
219    pub fn new(groups: &[u32]) -> io::Result<VfioContainer> {
220        // open groups
221
222        let group_numbers = Vec::from(groups)
223            .into_iter()
224            .collect::<BTreeSet<_>>()
225            .into_iter()
226            .collect::<Box<[_]>>();
227
228        let groups: HashMap<_, _> = group_numbers
229            .iter()
230            .map(|&n| Ok((n, open_group(n)?)))
231            .collect::<io::Result<_>>()?;
232
233        // create container
234
235        let file = OpenOptions::new()
236            .read(true)
237            .write(true)
238            .open("/dev/vfio/vfio")?;
239
240        let fd = file.as_raw_fd();
241
242        // check API version
243
244        if unsafe { vfio_get_api_version(fd)? } != VFIO_API_VERSION as i32 {
245            return Err(io::Error::new(
246                ErrorKind::InvalidInput,
247                "Wrong VFIO_API_VERSION",
248            ));
249        }
250
251        // check extension
252
253        if unsafe { vfio_check_extension(fd, VFIO_TYPE1v2_IOMMU as usize)? } != 1 {
254            return Err(io::Error::new(ErrorKind::InvalidInput, "TODO"));
255        }
256
257        // add groups to container
258
259        for group_file in groups.values() {
260            unsafe { vfio_group_set_container(group_file.as_raw_fd(), &fd)? };
261        }
262
263        // enable IOMMU
264
265        unsafe { vfio_set_iommu(fd, VFIO_TYPE1v2_IOMMU as usize)? };
266
267        // get IOMMU info
268
269        let iommu_info = get_iommu_info(fd)?;
270
271        // success
272
273        Ok(VfioContainer {
274            file,
275            group_numbers,
276            groups,
277            iommu_iova_alignment: iommu_info.iova_alignment,
278            iommu_max_num_mappings: iommu_info.max_num_mappings,
279            iommu_valid_iova_ranges: iommu_info.valid_iova_ranges,
280        })
281    }
282
283    /// The group numbers of the groups this container contains.
284    ///
285    /// In ascending order, without duplicates.
286    pub fn groups(&self) -> &[u32] {
287        &self.group_numbers
288    }
289
290    /// Returns a thing that lets you manage IOMMU mappings for DMA for all devices in all groups
291    /// that belong to this container.
292    pub fn iommu(&self) -> PciIommu {
293        PciIommu { internal: self }
294    }
295
296    /// Tries to reset all the PCI functions in all the VFIO groups that `self` refers to.
297    ///
298    /// This requires that the user has "ownership" over all the affected functions / permissions to
299    /// do it.
300    ///
301    /// TODO: Reset granularity might not match container granularity. Will probably need to expose
302    /// reset topology properly eventually.
303    ///
304    /// TODO: Should probably advertise whether this granularity of reset is supported, so the user
305    /// doesn't have to try resetting to find out.
306    pub fn reset(&self) -> io::Result<()> {
307        // TODO: Implement.
308        Err(io::Error::new(ErrorKind::Other, "not yet implemented"))
309    }
310}
311
312impl PciIommuInternal for VfioContainer {
313    fn alignment(&self) -> usize {
314        self.iommu_iova_alignment
315    }
316
317    fn valid_iova_ranges(&self) -> &[Range<u64>] {
318        &self.iommu_valid_iova_ranges
319    }
320
321    fn max_num_mappings(&self) -> u32 {
322        self.iommu_max_num_mappings
323    }
324
325    unsafe fn map(
326        &self,
327        iova: u64,
328        size: usize,
329        address: *const u8,
330        device_permissions: Permissions,
331    ) -> io::Result<()> {
332        // map region
333
334        let flags = match device_permissions {
335            Permissions::Read => VFIO_DMA_MAP_FLAG_READ,
336            Permissions::Write => VFIO_DMA_MAP_FLAG_WRITE,
337            Permissions::ReadWrite => VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
338        };
339
340        let dma_map = vfio_iommu_type1_dma_map {
341            argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
342            flags,
343            vaddr: address as u64,
344            iova,
345            size: size as u64,
346        };
347
348        unsafe { vfio_iommu_map_dma(self.file.as_raw_fd(), &dma_map) }.map_err(|e| {
349            io::Error::new(
350                ErrorKind::Other,
351                format!(
352                    "Failed to set up IOMMU mapping process memory [{:#x}, {:#x}) to device \
353                    memory [{:#x}, {:#x}): {}",
354                    address as usize,
355                    address as usize + size,
356                    iova,
357                    iova + size as u64,
358                    e
359                ),
360            )
361        })?;
362
363        // success
364
365        Ok(())
366    }
367
368    fn unmap(&self, iova: u64, size: usize) -> io::Result<()> {
369        let mut dma_unmap = vfio_iommu_type1_dma_unmap {
370            argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
371            flags: 0,
372            iova,
373            size: size as u64,
374            data: __IncompleteArrayField::new(),
375        };
376
377        unsafe { vfio_iommu_unmap_dma(self.file.as_raw_fd(), &mut dma_unmap)? };
378
379        Ok(())
380    }
381}
382
383/* ---------------------------------------------------------------------------------------------- */