io_uring/
types.rs

1//! Common Linux types not provided by libc.
2
3pub(crate) mod sealed {
4    use super::{Fd, Fixed};
5    use std::os::unix::io::RawFd;
6
7    #[derive(Debug)]
8    pub enum Target {
9        Fd(RawFd),
10        Fixed(u32),
11    }
12
13    pub trait UseFd: Sized {
14        fn into(self) -> RawFd;
15    }
16
17    pub trait UseFixed: Sized {
18        fn into(self) -> Target;
19    }
20
21    impl UseFd for Fd {
22        #[inline]
23        fn into(self) -> RawFd {
24            self.0
25        }
26    }
27
28    impl UseFixed for Fd {
29        #[inline]
30        fn into(self) -> Target {
31            Target::Fd(self.0)
32        }
33    }
34
35    impl UseFixed for Fixed {
36        #[inline]
37        fn into(self) -> Target {
38            Target::Fixed(self.0)
39        }
40    }
41}
42
43use crate::sys;
44use crate::util::{cast_ptr, unwrap_nonzero, unwrap_u32};
45use bitflags::bitflags;
46use std::convert::TryFrom;
47use std::marker::PhantomData;
48use std::num::NonZeroU32;
49use std::os::unix::io::RawFd;
50
51pub use sys::__kernel_rwf_t as RwFlags;
52
53/// Opaque types, you should use [`statx`](struct@libc::statx) instead.
54#[repr(C)]
55#[allow(non_camel_case_types)]
56pub struct statx {
57    _priv: (),
58}
59
60/// Opaque types, you should use [`epoll_event`](libc::epoll_event) instead.
61#[repr(C)]
62#[allow(non_camel_case_types)]
63pub struct epoll_event {
64    _priv: (),
65}
66
67/// A file descriptor that has not been registered with io_uring.
68#[derive(Debug, Clone, Copy)]
69#[repr(transparent)]
70pub struct Fd(pub RawFd);
71
72/// A file descriptor that has been registered with io_uring using
73/// [`Submitter::register_files`](crate::Submitter::register_files) or [`Submitter::register_files_sparse`](crate::Submitter::register_files_sparse).
74/// This can reduce overhead compared to using [`Fd`] in some cases.
75#[derive(Debug, Clone, Copy)]
76#[repr(transparent)]
77pub struct Fixed(pub u32);
78
79bitflags! {
80    /// Options for [`Timeout`](super::Timeout).
81    ///
82    /// The default behavior is to treat the timespec as a relative time interval. `flags` may
83    /// contain [`types::TimeoutFlags::ABS`] to indicate the timespec represents an absolute
84    /// time. When an absolute time is being specified, the kernel will use its monotonic clock
85    /// unless one of the following flags is set (they may not both be set):
86    /// [`types::TimeoutFlags::BOOTTIME`] or [`types::TimeoutFlags::REALTIME`].
87    ///
88    /// The default behavior when the timeout expires is to return a CQE with -libc::ETIME in
89    /// the res field. To change this behavior to have zero returned, include
90    /// [`types::TimeoutFlags::ETIME_SUCCESS`].
91    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
92    pub struct TimeoutFlags: u32 {
93        const ABS = sys::IORING_TIMEOUT_ABS;
94
95        const BOOTTIME = sys::IORING_TIMEOUT_BOOTTIME;
96
97        const REALTIME = sys::IORING_TIMEOUT_REALTIME;
98
99        const LINK_TIMEOUT_UPDATE = sys::IORING_LINK_TIMEOUT_UPDATE;
100
101        const ETIME_SUCCESS = sys::IORING_TIMEOUT_ETIME_SUCCESS;
102
103        const MULTISHOT = sys::IORING_TIMEOUT_MULTISHOT;
104    }
105}
106
107bitflags! {
108    /// Options for [`Fsync`](super::Fsync).
109    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
110    pub struct FsyncFlags: u32 {
111        const DATASYNC = sys::IORING_FSYNC_DATASYNC;
112    }
113}
114
115bitflags! {
116    /// Options for [`AsyncCancel`](super::AsyncCancel) and
117    /// [`Submitter::register_sync_cancel`](super::Submitter::register_sync_cancel).
118    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
119    pub(crate) struct AsyncCancelFlags: u32 {
120        /// Cancel all requests that match the given criteria, rather
121        /// than just canceling the first one found.
122        ///
123        /// Available since 5.19.
124        const ALL = sys::IORING_ASYNC_CANCEL_ALL;
125
126        /// Match based on the file descriptor used in the original
127        /// request rather than the user_data.
128        ///
129        /// Available since 5.19.
130        const FD = sys::IORING_ASYNC_CANCEL_FD;
131
132        /// Match any request in the ring, regardless of user_data or
133        /// file descriptor.  Can be used to cancel any pending
134        /// request in the ring.
135        ///
136        /// Available since 5.19.
137        const ANY = sys::IORING_ASYNC_CANCEL_ANY;
138
139        /// Match based on the fixed file descriptor used in the original
140        /// request rather than the user_data.
141        ///
142        /// Available since 6.0
143        const FD_FIXED = sys::IORING_ASYNC_CANCEL_FD_FIXED;
144    }
145}
146
147/// Wrapper around `open_how` as used in [the `openat2(2)` system
148/// call](https://man7.org/linux/man-pages/man2/openat2.2.html).
149#[derive(Default, Debug, Clone, Copy)]
150#[repr(transparent)]
151pub struct OpenHow(sys::open_how);
152
153impl OpenHow {
154    pub const fn new() -> Self {
155        OpenHow(sys::open_how {
156            flags: 0,
157            mode: 0,
158            resolve: 0,
159        })
160    }
161
162    pub const fn flags(mut self, flags: u64) -> Self {
163        self.0.flags = flags;
164        self
165    }
166
167    pub const fn mode(mut self, mode: u64) -> Self {
168        self.0.mode = mode;
169        self
170    }
171
172    pub const fn resolve(mut self, resolve: u64) -> Self {
173        self.0.resolve = resolve;
174        self
175    }
176}
177
178#[derive(Default, Debug, Clone, Copy)]
179#[repr(transparent)]
180pub struct Timespec(pub(crate) sys::__kernel_timespec);
181
182impl Timespec {
183    #[inline]
184    pub const fn new() -> Self {
185        Timespec(sys::__kernel_timespec {
186            tv_sec: 0,
187            tv_nsec: 0,
188        })
189    }
190
191    #[inline]
192    pub const fn sec(mut self, sec: u64) -> Self {
193        self.0.tv_sec = sec as _;
194        self
195    }
196
197    #[inline]
198    pub const fn nsec(mut self, nsec: u32) -> Self {
199        self.0.tv_nsec = nsec as _;
200        self
201    }
202}
203
204impl From<std::time::Duration> for Timespec {
205    fn from(value: std::time::Duration) -> Self {
206        Timespec::new()
207            .sec(value.as_secs())
208            .nsec(value.subsec_nanos())
209    }
210}
211
212/// Submit arguments
213///
214/// Note that arguments that exceed their lifetime will fail to compile.
215///
216/// ```compile_fail
217/// use io_uring::types::{ SubmitArgs, Timespec };
218///
219/// let sigmask: libc::sigset_t = unsafe { std::mem::zeroed() };
220///
221/// let mut args = SubmitArgs::new();
222///
223/// {
224///     let ts = Timespec::new();
225///     args = args.timespec(&ts);
226///     args = args.sigmask(&sigmask);
227/// }
228///
229/// drop(args);
230/// ```
231#[derive(Default, Debug, Clone, Copy)]
232pub struct SubmitArgs<'prev: 'now, 'now> {
233    pub(crate) args: sys::io_uring_getevents_arg,
234    prev: PhantomData<&'prev ()>,
235    now: PhantomData<&'now ()>,
236}
237
238impl<'prev, 'now> SubmitArgs<'prev, 'now> {
239    #[inline]
240    pub const fn new() -> SubmitArgs<'static, 'static> {
241        let args = sys::io_uring_getevents_arg {
242            sigmask: 0,
243            sigmask_sz: 0,
244            min_wait_usec: 0,
245            ts: 0,
246        };
247
248        SubmitArgs {
249            args,
250            prev: PhantomData,
251            now: PhantomData,
252        }
253    }
254
255    #[inline]
256    pub fn sigmask<'new>(mut self, sigmask: &'new libc::sigset_t) -> SubmitArgs<'now, 'new> {
257        self.args.sigmask = cast_ptr(sigmask) as _;
258        self.args.sigmask_sz = std::mem::size_of::<libc::sigset_t>() as _;
259
260        SubmitArgs {
261            args: self.args,
262            prev: self.now,
263            now: PhantomData,
264        }
265    }
266
267    #[inline]
268    pub fn timespec<'new>(mut self, timespec: &'new Timespec) -> SubmitArgs<'now, 'new> {
269        self.args.ts = cast_ptr(timespec) as _;
270
271        SubmitArgs {
272            args: self.args,
273            prev: self.now,
274            now: PhantomData,
275        }
276    }
277}
278
279#[repr(transparent)]
280pub struct BufRingEntry(sys::io_uring_buf);
281
282/// An entry in a buf_ring that allows setting the address, length and buffer id.
283#[allow(clippy::len_without_is_empty)]
284impl BufRingEntry {
285    /// Sets the entry addr.
286    pub fn set_addr(&mut self, addr: u64) {
287        self.0.addr = addr;
288    }
289
290    /// Returns the entry addr.
291    pub fn addr(&self) -> u64 {
292        self.0.addr
293    }
294
295    /// Sets the entry len.
296    pub fn set_len(&mut self, len: u32) {
297        self.0.len = len;
298    }
299
300    /// Returns the entry len.
301    pub fn len(&self) -> u32 {
302        self.0.len
303    }
304
305    /// Sets the entry bid.
306    pub fn set_bid(&mut self, bid: u16) {
307        self.0.bid = bid;
308    }
309
310    /// Returns the entry bid.
311    pub fn bid(&self) -> u16 {
312        self.0.bid
313    }
314
315    /// The offset to the ring's tail field given the ring's base address.
316    ///
317    /// The caller should ensure the ring's base address is aligned with the system's page size,
318    /// per the uring interface requirements.
319    ///
320    /// # Safety
321    ///
322    /// The ptr will be dereferenced in order to determine the address of the resv field,
323    /// so the caller is responsible for passing in a valid pointer. And not just
324    /// a valid pointer type, but also the argument must be the address to the first entry
325    /// of the buf_ring for the resv field to even be considered the tail field of the ring.
326    /// The entry must also be properly initialized.
327    pub unsafe fn tail(ring_base: *const BufRingEntry) -> *const u16 {
328        std::ptr::addr_of!((*ring_base).0.resv)
329    }
330}
331
332/// A destination slot for sending fixed resources
333/// (e.g. [`opcode::MsgRingSendFd`](crate::opcode::MsgRingSendFd)).
334#[derive(Debug, Clone, Copy)]
335pub struct DestinationSlot {
336    /// Fixed slot as indexed by the kernel (target+1).
337    dest: NonZeroU32,
338}
339
340impl DestinationSlot {
341    // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always > 0.
342    const AUTO_ALLOC: NonZeroU32 =
343        unwrap_nonzero(NonZeroU32::new(sys::IORING_FILE_INDEX_ALLOC as u32));
344
345    /// Use an automatically allocated target slot.
346    pub const fn auto_target() -> Self {
347        Self {
348            dest: DestinationSlot::AUTO_ALLOC,
349        }
350    }
351
352    /// Try to use a given target slot.
353    ///
354    /// Valid slots are in the range from `0` to `u32::MAX - 2` inclusive.
355    pub fn try_from_slot_target(target: u32) -> Result<Self, u32> {
356        // SAFETY: kernel constant, `IORING_FILE_INDEX_ALLOC` is always >= 2.
357        const MAX_INDEX: u32 = unwrap_u32(DestinationSlot::AUTO_ALLOC.get().checked_sub(2));
358
359        if target > MAX_INDEX {
360            return Err(target);
361        }
362
363        let kernel_index = target.saturating_add(1);
364        // SAFETY: by construction, always clamped between 1 and IORING_FILE_INDEX_ALLOC-1.
365        debug_assert!(0 < kernel_index && kernel_index < DestinationSlot::AUTO_ALLOC.get());
366        let dest = NonZeroU32::new(kernel_index).unwrap();
367
368        Ok(Self { dest })
369    }
370
371    pub(crate) fn kernel_index_arg(&self) -> u32 {
372        self.dest.get()
373    }
374}
375
376/// Helper structure for parsing the result of a multishot [`opcode::RecvMsg`](crate::opcode::RecvMsg).
377#[derive(Debug)]
378pub struct RecvMsgOut<'buf> {
379    header: sys::io_uring_recvmsg_out,
380    /// The fixed length of the name field, in bytes.
381    ///
382    /// If the incoming name data is larger than this, it gets truncated to this.
383    /// If it is smaller, it gets 0-padded to fill the whole field. In either case,
384    /// this fixed amount of space is reserved in the result buffer.
385    msghdr_name_len: usize,
386
387    name_data: &'buf [u8],
388    control_data: &'buf [u8],
389    payload_data: &'buf [u8],
390}
391
392impl<'buf> RecvMsgOut<'buf> {
393    const DATA_START: usize = std::mem::size_of::<sys::io_uring_recvmsg_out>();
394
395    /// Parse the data buffered upon completion of a `RecvMsg` multishot operation.
396    ///
397    /// `buffer` is the whole buffer previously provided to the ring, while `msghdr`
398    /// is the same content provided as input to the corresponding SQE
399    /// (only `msg_namelen` and `msg_controllen` fields are relevant).
400    #[allow(clippy::result_unit_err)]
401    #[allow(clippy::useless_conversion)]
402    pub fn parse(buffer: &'buf [u8], msghdr: &libc::msghdr) -> Result<Self, ()> {
403        let msghdr_name_len = usize::try_from(msghdr.msg_namelen).unwrap();
404        let msghdr_control_len = usize::try_from(msghdr.msg_controllen).unwrap();
405
406        if Self::DATA_START
407            .checked_add(msghdr_name_len)
408            .and_then(|acc| acc.checked_add(msghdr_control_len))
409            .map(|header_len| buffer.len() < header_len)
410            .unwrap_or(true)
411        {
412            return Err(());
413        }
414        // SAFETY: buffer (minimum) length is checked here above.
415        let header = unsafe {
416            buffer
417                .as_ptr()
418                .cast::<sys::io_uring_recvmsg_out>()
419                .read_unaligned()
420        };
421
422        // min is used because the header may indicate the true size of the data
423        // while what we received was truncated.
424        let (name_data, control_start) = {
425            let name_start = Self::DATA_START;
426            let name_data_end =
427                name_start + usize::min(usize::try_from(header.namelen).unwrap(), msghdr_name_len);
428            let name_field_end = name_start + msghdr_name_len;
429            (&buffer[name_start..name_data_end], name_field_end)
430        };
431        let (control_data, payload_start) = {
432            let control_data_end = control_start
433                + usize::min(
434                    usize::try_from(header.controllen).unwrap(),
435                    msghdr_control_len,
436                );
437            let control_field_end = control_start + msghdr_control_len;
438            (&buffer[control_start..control_data_end], control_field_end)
439        };
440        let payload_data = {
441            let payload_data_end = payload_start
442                + usize::min(
443                    usize::try_from(header.payloadlen).unwrap(),
444                    buffer.len() - payload_start,
445                );
446            &buffer[payload_start..payload_data_end]
447        };
448
449        Ok(Self {
450            header,
451            msghdr_name_len,
452            name_data,
453            control_data,
454            payload_data,
455        })
456    }
457
458    /// Return the length of the incoming `name` data.
459    ///
460    /// This may be larger than the size of the content returned by
461    /// `name_data()`, if the kernel could not fit all the incoming
462    /// data in the provided buffer size. In that case, name data in
463    /// the result buffer gets truncated.
464    pub fn incoming_name_len(&self) -> u32 {
465        self.header.namelen
466    }
467
468    /// Return whether the incoming name data was larger than the provided limit/buffer.
469    ///
470    /// When `true`, data returned by `name_data()` is truncated and
471    /// incomplete.
472    pub fn is_name_data_truncated(&self) -> bool {
473        self.header.namelen as usize > self.msghdr_name_len
474    }
475
476    /// Message control data, with the same semantics as `msghdr.msg_control`.
477    pub fn name_data(&self) -> &[u8] {
478        self.name_data
479    }
480
481    /// Return the length of the incoming `control` data.
482    ///
483    /// This may be larger than the size of the content returned by
484    /// `control_data()`, if the kernel could not fit all the incoming
485    /// data in the provided buffer size. In that case, control data in
486    /// the result buffer gets truncated.
487    pub fn incoming_control_len(&self) -> u32 {
488        self.header.controllen
489    }
490
491    /// Return whether the incoming control data was larger than the provided limit/buffer.
492    ///
493    /// When `true`, data returned by `control_data()` is truncated and
494    /// incomplete.
495    pub fn is_control_data_truncated(&self) -> bool {
496        (self.header.flags & u32::try_from(libc::MSG_CTRUNC).unwrap()) != 0
497    }
498
499    /// Message control data, with the same semantics as `msghdr.msg_control`.
500    pub fn control_data(&self) -> &[u8] {
501        self.control_data
502    }
503
504    /// Return whether the incoming payload was larger than the provided limit/buffer.
505    ///
506    /// When `true`, data returned by `payload_data()` is truncated and
507    /// incomplete.
508    pub fn is_payload_truncated(&self) -> bool {
509        (self.header.flags & u32::try_from(libc::MSG_TRUNC).unwrap()) != 0
510    }
511
512    /// Message payload, as buffered by the kernel.
513    pub fn payload_data(&self) -> &[u8] {
514        self.payload_data
515    }
516
517    /// Return the length of the incoming `payload` data.
518    ///
519    /// This may be larger than the size of the content returned by
520    /// `payload_data()`, if the kernel could not fit all the incoming
521    /// data in the provided buffer size. In that case, payload data in
522    /// the result buffer gets truncated.
523    pub fn incoming_payload_len(&self) -> u32 {
524        self.header.payloadlen
525    }
526
527    /// Message flags, with the same semantics as `msghdr.msg_flags`.
528    pub fn flags(&self) -> u32 {
529        self.header.flags
530    }
531}
532
533/// [CancelBuilder] constructs match criteria for request cancellation.
534///
535/// The [CancelBuilder] can be used to selectively cancel one or more requests
536/// by user_data, fd, fixed fd, or unconditionally.
537///
538/// ### Examples
539///
540/// ```
541/// use io_uring::types::{CancelBuilder, Fd, Fixed};
542///
543/// // Match all in-flight requests.
544/// CancelBuilder::any();
545///
546/// // Match a single request with user_data = 42.
547/// CancelBuilder::user_data(42);
548///
549/// // Match a single request with fd = 42.
550/// CancelBuilder::fd(Fd(42));
551///
552/// // Match a single request with fixed fd = 42.
553/// CancelBuilder::fd(Fixed(42));
554///
555/// // Match all in-flight requests with user_data = 42.
556/// CancelBuilder::user_data(42).all();
557/// ```
558#[derive(Debug)]
559pub struct CancelBuilder {
560    pub(crate) flags: AsyncCancelFlags,
561    pub(crate) user_data: Option<u64>,
562    pub(crate) fd: Option<sealed::Target>,
563}
564
565impl CancelBuilder {
566    /// Create a new [CancelBuilder] which will match any in-flight request.
567    ///
568    /// This will cancel every in-flight request in the ring.
569    ///
570    /// Async cancellation matching any requests is only available since 5.19.
571    pub const fn any() -> Self {
572        Self {
573            flags: AsyncCancelFlags::ANY,
574            user_data: None,
575            fd: None,
576        }
577    }
578
579    /// Create a new [CancelBuilder] which will match in-flight requests
580    /// with the given `user_data` value.
581    ///
582    /// The first request with the given `user_data` value will be canceled.
583    /// [CancelBuilder::all](#method.all) can be called to instead match every
584    /// request with the provided `user_data` value.
585    pub const fn user_data(user_data: u64) -> Self {
586        Self {
587            flags: AsyncCancelFlags::empty(),
588            user_data: Some(user_data),
589            fd: None,
590        }
591    }
592
593    /// Create a new [CancelBuilder] which will match in-flight requests with
594    /// the given `fd` value.
595    ///
596    /// The first request with the given `fd` value will be canceled. [CancelBuilder::all](#method.all)
597    /// can be called to instead match every request with the provided `fd` value.
598    ///
599    /// FD async cancellation is only available since 5.19.
600    pub fn fd(fd: impl sealed::UseFixed) -> Self {
601        let mut flags = AsyncCancelFlags::FD;
602        let target = fd.into();
603        if matches!(target, sealed::Target::Fixed(_)) {
604            flags.insert(AsyncCancelFlags::FD_FIXED);
605        }
606        Self {
607            flags,
608            user_data: None,
609            fd: Some(target),
610        }
611    }
612
613    /// Modify the [CancelBuilder] match criteria to match all in-flight requests
614    /// rather than just the first one.
615    ///
616    /// This has no effect when combined with [CancelBuilder::any](#method.any).
617    ///
618    /// Async cancellation matching all requests is only available since 5.19.
619    pub fn all(mut self) -> Self {
620        self.flags.insert(AsyncCancelFlags::ALL);
621        self
622    }
623
624    pub(crate) fn to_fd(&self) -> i32 {
625        self.fd
626            .as_ref()
627            .map(|target| match *target {
628                sealed::Target::Fd(fd) => fd,
629                sealed::Target::Fixed(idx) => idx as i32,
630            })
631            .unwrap_or(-1)
632    }
633}
634
635/// Wrapper around `futex_waitv` as used in [`futex_waitv` system
636/// call](https://www.kernel.org/doc/html/latest/userspace-api/futex2.html).
637#[derive(Default, Debug, Clone, Copy)]
638#[repr(transparent)]
639pub struct FutexWaitV(sys::futex_waitv);
640
641impl FutexWaitV {
642    pub const fn new() -> Self {
643        Self(sys::futex_waitv {
644            val: 0,
645            uaddr: 0,
646            flags: 0,
647            __reserved: 0,
648        })
649    }
650
651    pub const fn val(mut self, val: u64) -> Self {
652        self.0.val = val;
653        self
654    }
655
656    pub const fn uaddr(mut self, uaddr: u64) -> Self {
657        self.0.uaddr = uaddr;
658        self
659    }
660
661    pub const fn flags(mut self, flags: u32) -> Self {
662        self.0.flags = flags;
663        self
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use std::time::Duration;
670
671    use crate::types::sealed::Target;
672
673    use super::*;
674
675    #[test]
676    fn timespec_from_duration_converts_correctly() {
677        let duration = Duration::new(2, 500);
678        let timespec = Timespec::from(duration);
679
680        assert_eq!(timespec.0.tv_sec as u64, duration.as_secs());
681        assert_eq!(timespec.0.tv_nsec as u32, duration.subsec_nanos());
682    }
683
684    #[test]
685    fn test_cancel_builder_flags() {
686        let cb = CancelBuilder::any();
687        assert_eq!(cb.flags, AsyncCancelFlags::ANY);
688
689        let mut cb = CancelBuilder::user_data(42);
690        assert_eq!(cb.flags, AsyncCancelFlags::empty());
691        assert_eq!(cb.user_data, Some(42));
692        assert!(cb.fd.is_none());
693        cb = cb.all();
694        assert_eq!(cb.flags, AsyncCancelFlags::ALL);
695
696        let mut cb = CancelBuilder::fd(Fd(42));
697        assert_eq!(cb.flags, AsyncCancelFlags::FD);
698        assert!(matches!(cb.fd, Some(Target::Fd(42))));
699        assert!(cb.user_data.is_none());
700        cb = cb.all();
701        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::ALL);
702
703        let mut cb = CancelBuilder::fd(Fixed(42));
704        assert_eq!(cb.flags, AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED);
705        assert!(matches!(cb.fd, Some(Target::Fixed(42))));
706        assert!(cb.user_data.is_none());
707        cb = cb.all();
708        assert_eq!(
709            cb.flags,
710            AsyncCancelFlags::FD | AsyncCancelFlags::FD_FIXED | AsyncCancelFlags::ALL
711        );
712    }
713}