Skip to main content

kevy_uring/
pbr.rs

1//! Provided-buffer ring — the destination pool for multishot `recv`. The
2//! kernel draws a buffer per arrival and reports its id; the app recycles
3//! the buffer once the bytes are copied out.
4
5use core::ffi::{c_int, c_long, c_void};
6use core::ptr;
7use core::sync::atomic::{AtomicU16, Ordering};
8use std::io;
9
10use crate::ffi::{
11    self, IO_URING_BUF_SIZE, IO_URING_BUF_TAIL_OFF, IORING_REGISTER_PBUF_RING,
12    IORING_UNREGISTER_PBUF_RING, MAP_ANONYMOUS, MAP_PRIVATE, PROT_READ, PROT_WRITE,
13    SYS_IO_URING_REGISTER,
14};
15use crate::layout::IoUringBufReg;
16
17/// A registered provided-buffer ring (the destination pool for multishot
18/// [`recv`](crate::IoUring::prep_recv_multishot)). Owns the buf-ring mapping
19/// and the backing slab; the kernel fills a buffer per arrival, the app
20/// recycles it.
21pub struct ProvidedBufRing {
22    pub(crate) ring_fd: c_int,
23    pub(crate) ring: *mut u8,
24    pub(crate) ring_len: usize,
25    /// Contiguous backing store; buffer `bid` is `slab[bid*buf_size ..][..n]`.
26    /// Never resized, so the addresses published into the ring stay valid.
27    pub(crate) slab: Vec<u8>,
28    pub(crate) mask: u16,
29    pub(crate) buf_size: u32,
30    pub(crate) bgid: u16,
31    /// Local producer cursor (published to the kernel by [`Self::commit`]).
32    pub(crate) tail: u16,
33}
34
35// SAFETY: like `IoUring`, a single owner per shard thread; not `Sync`.
36unsafe impl Send for ProvidedBufRing {}
37
38impl ProvidedBufRing {
39    /// Allocate a provided-buffer ring and register it with the kernel under
40    /// `bgid`. Called from [`IoUring::register_buf_ring`](crate::IoUring::register_buf_ring).
41    pub(crate) fn new(
42        ring_fd: c_int,
43        entries: u16,
44        buf_size: u32,
45        bgid: u16,
46    ) -> io::Result<Self> {
47        assert!(entries.is_power_of_two(), "buf ring entries must be power of two");
48        let (ring, ring_len) = Self::mmap_buf_ring(entries)?;
49        if let Err(e) = Self::register_with_kernel(ring_fd, ring, entries, bgid) {
50            // Unmap on failure so we don't leak the page.
51            // SAFETY: `ring`/`ring_len` are the pair `mmap` just returned to us.
52            unsafe { ffi::munmap(ring as *mut c_void, ring_len) };
53            return Err(e);
54        }
55        let mut pbr = ProvidedBufRing {
56            ring_fd,
57            ring,
58            ring_len,
59            slab: vec![0u8; entries as usize * buf_size as usize],
60            mask: entries - 1,
61            buf_size,
62            bgid,
63            tail: 0,
64        };
65        // Publish all buffers so the first recvs have somewhere to land.
66        for bid in 0..entries {
67            pbr.stage(bid);
68        }
69        pbr.commit();
70        Ok(pbr)
71    }
72
73    /// Allocate the `entries × io_uring_buf` page-aligned region.
74    fn mmap_buf_ring(entries: u16) -> io::Result<(*mut u8, usize)> {
75        let ring_len = entries as usize * IO_URING_BUF_SIZE;
76        // SAFETY: anonymous mmap with valid PROT/flag bitset and length > 0.
77        let ring = unsafe {
78            ffi::mmap(
79                ptr::null_mut(),
80                ring_len,
81                PROT_READ | PROT_WRITE,
82                MAP_ANONYMOUS | MAP_PRIVATE,
83                -1,
84                0,
85            )
86        };
87        if ring as isize == -1 {
88            return Err(io::Error::last_os_error());
89        }
90        Ok((ring as *mut u8, ring_len))
91    }
92
93    /// Tell the kernel about a newly allocated buf ring under `bgid`.
94    fn register_with_kernel(ring_fd: c_int, ring: *mut u8, entries: u16, bgid: u16) -> io::Result<()> {
95        let reg = IoUringBufReg {
96            ring_addr: ring as u64,
97            ring_entries: entries as u32,
98            bgid,
99            pad: 0,
100            resv: [0; 3],
101        };
102        // SAFETY: `reg` lives through the syscall; ring_fd is a valid io_uring fd.
103        let ret = unsafe {
104            ffi::syscall(
105                SYS_IO_URING_REGISTER,
106                ring_fd as c_long,
107                IORING_REGISTER_PBUF_RING as c_long,
108                &reg as *const IoUringBufReg as c_long,
109                1 as c_long,
110            )
111        };
112        if ret < 0 {
113            return Err(io::Error::last_os_error());
114        }
115        Ok(())
116    }
117
118    /// The buffer group id this ring serves (pass to `prep_recv_multishot`).
119    pub fn group(&self) -> u16 {
120        self.bgid
121    }
122
123    /// The `n` valid bytes the kernel placed in buffer `bid` (n = completion `res`).
124    pub fn bytes(&self, bid: u16, n: usize) -> &[u8] {
125        let start = bid as usize * self.buf_size as usize;
126        &self.slab[start..start + n.min(self.buf_size as usize)]
127    }
128
129    /// Place buffer `bid` at the current tail slot (without publishing). Writes
130    /// only addr/len/bid (offsets 0..14), never the tail at offset 14.
131    pub(crate) fn stage(&mut self, bid: u16) {
132        let idx = (self.tail & self.mask) as usize;
133        // SAFETY: `idx` is masked into [0, entries), so `base` is in-bounds.
134        let base = unsafe { self.ring.add(idx * IO_URING_BUF_SIZE) };
135        // SAFETY: `bid` * buf_size is < slab.len() by construction.
136        let addr = unsafe { self.slab.as_ptr().add(bid as usize * self.buf_size as usize) } as u64;
137        // SAFETY: `base` is an in-bounds 16-byte slot; fields are unaligned-safe.
138        unsafe {
139            ptr::write_unaligned(base as *mut u64, addr);
140            ptr::write_unaligned(base.add(8) as *mut u32, self.buf_size);
141            ptr::write_unaligned(base.add(12) as *mut u16, bid);
142        }
143        self.tail = self.tail.wrapping_add(1);
144    }
145
146    /// Publish the staged buffers to the kernel (store-release on the ring tail).
147    pub(crate) fn commit(&self) {
148        // SAFETY: `ring + IO_URING_BUF_TAIL_OFF` is a 2-byte slot inside the
149        // mapping (offset 14 of slot 0 — see IO_URING_BUF_TAIL_OFF doc).
150        let tail = unsafe { &*(self.ring.add(IO_URING_BUF_TAIL_OFF) as *const AtomicU16) };
151        tail.store(self.tail, Ordering::Release);
152    }
153
154    /// Return buffer `bid` to the ring so the kernel can reuse it. Call once the
155    /// bytes from its completion have been copied out.
156    pub fn recycle(&mut self, bid: u16) {
157        self.stage(bid);
158        self.commit();
159    }
160}
161
162impl Drop for ProvidedBufRing {
163    fn drop(&mut self) {
164        // Best-effort unregister (EBADF if the ring fd is already closed — fine),
165        // then unmap. The slab Vec frees itself.
166        let reg = IoUringBufReg {
167            ring_addr: 0,
168            ring_entries: 0,
169            bgid: self.bgid,
170            pad: 0,
171            resv: [0; 3],
172        };
173        // SAFETY: kernel-side cleanup; the mapping is ours to free.
174        unsafe {
175            ffi::syscall(
176                SYS_IO_URING_REGISTER,
177                self.ring_fd as c_long,
178                IORING_UNREGISTER_PBUF_RING as c_long,
179                &reg as *const IoUringBufReg as c_long,
180                1 as c_long,
181            );
182            ffi::munmap(self.ring as *mut c_void, self.ring_len);
183        }
184    }
185}