lx/syscalls/
io_uring.rs

1use core::{
2    mem,
3    ptr,
4};
5
6use super::abi::*;
7use crate::{
8    result_from_value,
9    sigset_t,
10    unit_result_from_value,
11    AsRawFd,
12    OwnedFd,
13    RawFd,
14};
15
16// Submission queue entry flags.
17pub const IOSQE_FIXED_FILE: u8 = 0x1;
18pub const IOSQE_IO_DRAIN: u8 = 0x2;
19pub const IOSQE_IO_LINK: u8 = 0x4;
20pub const IOSQE_IO_HARDLINK: u8 = 0x8;
21pub const IOSQE_ASYNC: u8 = 0x10;
22pub const IOSQE_BUFFER_SELECT: u8 = 0x20;
23
24// Opcodes.
25pub const IORING_OP_NOP: u8 = 0;
26pub const IORING_OP_READV: u8 = 1;
27pub const IORING_OP_WRITEV: u8 = 2;
28pub const IORING_OP_FSYNC: u8 = 3;
29pub const IORING_OP_READ_FIXED: u8 = 4;
30pub const IORING_OP_WRITE_FIXED: u8 = 5;
31pub const IORING_OP_POLL_ADD: u8 = 6;
32pub const IORING_OP_POLL_REMOVE: u8 = 7;
33pub const IORING_OP_SYNC_FILE_RANGE: u8 = 8;
34pub const IORING_OP_SENDMSG: u8 = 9;
35pub const IORING_OP_RECVMSG: u8 = 10;
36pub const IORING_OP_TIMEOUT: u8 = 11;
37pub const IORING_OP_TIMEOUT_REMOVE: u8 = 12;
38pub const IORING_OP_ACCEPT: u8 = 13;
39pub const IORING_OP_ASYNC_CANCEL: u8 = 14;
40pub const IORING_OP_LINK_TIMEOUT: u8 = 15;
41pub const IORING_OP_CONNECT: u8 = 16;
42pub const IORING_OP_FALLOCATE: u8 = 17;
43pub const IORING_OP_OPENAT: u8 = 18;
44pub const IORING_OP_CLOSE: u8 = 19;
45pub const IORING_OP_FILES_UPDATE: u8 = 20;
46pub const IORING_OP_STATX: u8 = 21;
47pub const IORING_OP_READ: u8 = 22;
48pub const IORING_OP_WRITE: u8 = 23;
49pub const IORING_OP_FADVISE: u8 = 24;
50pub const IORING_OP_MADVISE: u8 = 25;
51pub const IORING_OP_SEND: u8 = 26;
52pub const IORING_OP_RECV: u8 = 27;
53pub const IORING_OP_OPENAT2: u8 = 28;
54pub const IORING_OP_EPOLL_CTL: u8 = 29;
55pub const IORING_OP_SPLICE: u8 = 30;
56pub const IORING_OP_PROVIDE_BUFFERS: u8 = 31;
57pub const IORING_OP_REMOVE_BUFFERS: u8 = 32;
58pub const IORING_OP_TEE: u8 = 33;
59pub const IORING_OP_SHUTDOWN: u8 = 34;
60pub const IORING_OP_RENAMEAT: u8 = 35;
61pub const IORING_OP_UNLINKAT: u8 = 36;
62pub const IORING_OP_MKDIRAT: u8 = 37;
63pub const IORING_OP_SYMLINKAT: u8 = 38;
64pub const IORING_OP_LINKAT: u8 = 39;
65
66// Fsync flags.
67pub const IORING_FSYNC_DATASYNC: u32 = 0x1;
68
69// Timeout flags.
70pub const IORING_TIMEOUT_ABS: u32 = 0x1;
71pub const IORING_TIMEOUT_UPDATE: u32 = 0x2;
72pub const IORING_TIMEOUT_BOOTTIME: u32 = 0x4;
73pub const IORING_TIMEOUT_REALTIME: u32 = 0x8;
74pub const IORING_LINK_TIMEOUT_UPDATE: u32 = 0x10;
75pub const IORING_TIMEOUT_CLOCK_MASK: u32 = IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME;
76pub const IORING_TIMEOUT_UPDATE_MASK: u32 = IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE;
77
78// Splice flags.
79pub const SPLICE_F_FD_IN_FIXED: u32 = 0x80000000;
80
81// Poll flags.
82pub const IORING_POLL_ADD_MULTI: u32 = 0x1;
83pub const IORING_POLL_UPDATE_EVENTS: u32 = 0x2;
84pub const IORING_POLL_UPDATE_USER_DATA: u32 = 0x4;
85
86// Completion entry flags.
87pub const IORING_CQE_F_BUFFER: u32 = 0x1;
88pub const IORING_CQE_F_MORE: u32 = 0x2;
89
90pub const IORING_CQE_BUFFER_SHIFT: u8 = 16;
91
92pub const IORING_OFF_SQ_RING: usize = 0x0;
93pub const IORING_OFF_CQ_RING: usize = 0x8000000;
94pub const IORING_OFF_SQES: usize = 0x10000000;
95
96pub const IORING_SQ_NEED_WAKEUP: u32 = 0x1;
97pub const IORING_SQ_CQ_OVERFLOW: u32 = 0x2;
98
99pub const IORING_CQ_EVENTFD_DISABLED: u32 = 0x2;
100
101pub const IORING_ENTER_GETEVENTS: u32 = 0x1;
102pub const IORING_ENTER_SQ_WAKEUP: u32 = 0x2;
103pub const IORING_ENTER_SQ_WAIT: u32 = 0x4;
104pub const IORING_ENTER_EXT_ARG: u32 = 0x8;
105
106// Setup flags.
107pub const IORING_SETUP_IOPOLL: u32 = 0x1;
108pub const IORING_SETUP_SQPOLL: u32 = 0x2;
109pub const IORING_SETUP_SQ_AFF: u32 = 0x4;
110pub const IORING_SETUP_CQSIZE: u32 = 0x8;
111pub const IORING_SETUP_CLAMP: u32 = 0x10;
112pub const IORING_SETUP_ATTACH_WQ: u32 = 0x20;
113pub const IORING_SETUP_R_DISABLED: u32 = 0x40;
114pub const IORING_SETUP_SUBMIT_ALL: u32 = 0x80;
115pub const IORING_SETUP_COOP_TASKRUN: u32 = 0x100;
116pub const IORING_SETUP_TASKRUN_FLAG: u32 = 0x200;
117pub const IORING_SETUP_SQE128: u32 = 0x400;
118pub const IORING_SETUP_CQE32: u32 = 0x800;
119pub const IORING_SETUP_SINGLE_ISSUER: u32 = 0x1000;
120pub const IORING_SETUP_DEFER_TASKRUN: u32 = 0x2000;
121pub const IORING_SETUP_NO_MMAP: u32 = 0x4000;
122pub const IORING_SETUP_REGISTERED_FD_ONLY: u32 = 0x8000;
123
124// Features supported by the kernel.
125pub const IORING_FEAT_SINGLE_MMAP: u32 = 0x1;
126pub const IORING_FEAT_NODROP: u32 = 0x2;
127pub const IORING_FEAT_SUBMIT_STABLE: u32 = 0x4;
128pub const IORING_FEAT_RW_CUR_POS: u32 = 0x8;
129pub const IORING_FEAT_CUR_PERSONALITY: u32 = 0x10;
130pub const IORING_FEAT_FAST_POLL: u32 = 0x20;
131pub const IORING_FEAT_POLL_32BITS: u32 = 0x40;
132pub const IORING_FEAT_SQPOLL_NONFIXED: u32 = 0x80;
133pub const IORING_FEAT_EXT_ARG: u32 = 0x100;
134pub const IORING_FEAT_NATIVE_WORKERS: u32 = 0x200;
135pub const IORING_FEAT_RSRC_TAGS: u32 = 0x400;
136
137// Resource register codes.
138pub const IORING_REGISTER_BUFFERS: u8 = 0;
139pub const IORING_UNREGISTER_BUFFERS: u8 = 1;
140pub const IORING_REGISTER_FILES: u8 = 2;
141pub const IORING_UNREGISTER_FILES: u8 = 3;
142pub const IORING_REGISTER_EVENTFD: u8 = 4;
143pub const IORING_UNREGISTER_EVENTFD: u8 = 5;
144pub const IORING_REGISTER_FILES_UPDATE: u8 = 6;
145pub const IORING_REGISTER_EVENTFD_ASYNC: u8 = 7;
146pub const IORING_REGISTER_PROBE: u8 = 8;
147pub const IORING_REGISTER_PERSONALITY: u8 = 9;
148pub const IORING_UNREGISTER_PERSONALITY: u8 = 10;
149pub const IORING_REGISTER_RESTRICTIONS: u8 = 11;
150pub const IORING_REGISTER_ENABLE_RINGS: u8 = 12;
151pub const IORING_REGISTER_FILES2: u8 = 13;
152pub const IORING_REGISTER_FILES_UPDATE2: u8 = 14;
153pub const IORING_REGISTER_BUFFERS2: u8 = 15;
154pub const IORING_REGISTER_BUFFERS_UPDATE: u8 = 16;
155pub const IORING_REGISTER_IOWQ_AFF: u8 = 17;
156pub const IORING_UNREGISTER_IOWQ_AFF: u8 = 18;
157pub const IORING_REGISTER_IOWQ_MAX_WORKERS: u8 = 19;
158
159pub const IO_URING_OP_SUPPORTED: u16 = 0x1;
160
161pub const IORING_RESTRICTION_REGISTER_OP: u8 = 0;
162pub const IORING_RESTRICTION_SQE_OP: u8 = 1;
163pub const IORING_RESTRICTION_SQE_FLAGS_ALLOWED: u8 = 2;
164pub const IORING_RESTRICTION_SQE_FLAGS_REQUIRED: u8 = 3;
165
166#[allow(non_camel_case_types)]
167#[repr(C)]
168#[derive(Clone, Copy)]
169pub struct io_uring_sqe {
170    pub opcode: u8,
171    pub flags: u8,
172    pub ioprio: u16,
173    pub fd: RawFd,
174    pub off: u64,
175    pub addr: u64,
176    pub len: u32,
177    pub op_flags: u32,
178    pub user_data: u64,
179    pub buf_index_or_group: u16,
180    pub personality: u16,
181    pub splice_fd_in_or_file_index: u32,
182    zero: [u64; 2],
183}
184
185#[allow(non_camel_case_types)]
186#[repr(C)]
187#[derive(Clone, Copy)]
188pub struct io_uring_cqe {
189    pub user_data: u64,
190    pub ret: i32,
191    pub flags: u32,
192}
193
194#[allow(non_camel_case_types)]
195#[repr(C)]
196#[derive(Clone, Copy)]
197pub struct io_sqring_offsets {
198    pub head: u32,
199    pub tail: u32,
200    pub ring_mask: u32,
201    pub ring_entries: u32,
202    pub flags: u32,
203    pub dropped: u32,
204    pub array: u32,
205    zero: [u32; 3],
206}
207
208#[allow(non_camel_case_types)]
209#[repr(C)]
210#[derive(Clone, Copy)]
211pub struct io_cqring_offsets {
212    pub head: u32,
213    pub tail: u32,
214    pub ring_mask: u32,
215    pub ring_entries: u32,
216    pub overflow: u32,
217    pub cqes: u32,
218    pub flags: u32,
219    zero: [u32; 3],
220}
221
222#[allow(non_camel_case_types)]
223#[repr(C)]
224#[derive(Clone, Copy)]
225pub struct io_uring_rsrc_register {
226    pub nr: u32,
227    zero: u32,
228    zero2: u64,
229    pub data: u64,
230    pub tags: u64,
231}
232
233#[allow(non_camel_case_types)]
234#[repr(C)]
235#[derive(Clone, Copy)]
236pub struct io_uring_rsrc_update {
237    pub offset: u32,
238    zero: u32,
239    pub data: u64,
240}
241
242#[allow(non_camel_case_types)]
243#[repr(C)]
244#[derive(Clone, Copy)]
245pub struct io_uring_rsrc_update2 {
246    pub offset: u32,
247    zero: u32,
248    pub data: u64,
249    pub tags: u64,
250    pub nr: u32,
251    zero2: u32,
252}
253
254#[allow(non_camel_case_types)]
255#[repr(C)]
256#[derive(Clone, Copy)]
257pub struct io_uring_params {
258    pub sq_entries: u32,
259    pub cq_entries: u32,
260    pub flags: u32,
261    pub sq_thread_cpu: u32,
262    pub sq_thread_idle: u32,
263    pub features: u32,
264    pub wq_fd: u32,
265    zero: [u32; 3],
266    pub sq_off: io_sqring_offsets,
267    pub cq_off: io_cqring_offsets,
268}
269
270#[allow(non_camel_case_types)]
271#[repr(C)]
272#[derive(Clone, Copy)]
273pub struct io_uring_probe_op {
274    pub op: u8,
275    zero: u8,
276    pub flags: u16,
277    zero2: u32,
278}
279
280#[allow(non_camel_case_types)]
281#[repr(C)]
282pub struct io_uring_probe {
283    pub last_op: u8,
284    pub ops_len: u8,
285    zero: u16,
286    zero2: [u32; 3],
287    pub ops: [io_uring_probe_op],
288}
289
290#[allow(non_camel_case_types)]
291#[repr(C)]
292#[derive(Clone, Copy)]
293pub struct io_uring_restriction {
294    pub opcode: u16,
295    pub op_or_flags: u8,
296    zero: u8,
297    zero2: [u32; 3],
298}
299
300#[allow(non_camel_case_types)]
301#[repr(C)]
302#[derive(Clone, Copy)]
303pub struct io_uring_getevents_arg {
304    pub sigmask: u64,
305    pub sigmask_sz: u32,
306    zero: u32,
307    pub ts: u64,
308}
309
310/// Registers a resource on the io_uring instance so that it can be used in submition entries.
311///
312/// # Safety
313///
314/// `args` must contain valid data. What this means depends on the `opcode`.
315/// io_uring documentation or implementation.
316#[inline]
317pub unsafe fn io_uring_register(
318    fd: &impl AsRawFd,
319    opcode: u32,
320    args: &[*const u8],
321) -> crate::Result<()> {
322    let ret = syscall_4(
323        427,
324        fd.as_raw_fd() as usize,
325        opcode as usize,
326        args.as_ptr() as usize,
327        args.len(),
328    ) as i32;
329    unit_result_from_value(ret)
330}
331
332#[inline]
333pub fn io_uring_setup(entries: u32, params: &mut io_uring_params) -> crate::Result<OwnedFd> {
334    let ret = unsafe {
335        syscall_2(
336            425,
337            entries as usize,
338            params as *mut io_uring_params as usize,
339        ) as i32
340    };
341    result_from_value(ret).map(OwnedFd::new)
342}
343
344/// Tell the kernel about new pending submission entries, and/or wait for new completion entries.
345///
346/// # Safety
347///
348/// If `arg` is not `ptr::null()`, then `arg_size` must contain the size of the value it is
349/// pointing to, and the value must be valid.
350#[inline]
351pub unsafe fn io_uring_enter(
352    fd_or_idx: u32,
353    to_submit: u32,
354    min_complete: u32,
355    flags: u32,
356    arg: *const u8,
357    arg_size: usize,
358) -> crate::Result<i32> {
359    let ret = syscall_6(
360        426,
361        fd_or_idx as usize,
362        to_submit as usize,
363        min_complete as usize,
364        flags as usize,
365        arg as usize,
366        arg_size,
367    ) as i32;
368    result_from_value(ret)
369}
370
371#[inline]
372pub fn io_uring_enter_getevents(
373    fd_or_idx: u32,
374    to_submit: u32,
375    min_complete: u32,
376) -> crate::Result<i32> {
377    unsafe {
378        io_uring_enter(
379            fd_or_idx,
380            to_submit,
381            min_complete,
382            IORING_ENTER_GETEVENTS,
383            ptr::null(),
384            mem::size_of::<sigset_t>(),
385        )
386    }
387}