kevy_uring/prep.rs
1//! SQE preparation helpers — `prep_*` queue one io_uring submission entry
2//! into the SQ ring. Split out of [`crate::ring`] so that file stays under
3//! the 500-LOC house rule. Each helper returns `false` if the SQ is full;
4//! the caller is expected to submit and retry.
5
6use core::ptr;
7
8use crate::ffi::{
9 IORING_ACCEPT_MULTISHOT, IORING_OP_ACCEPT, IORING_OP_NOP, IORING_OP_READ, IORING_OP_RECV,
10 IORING_OP_TIMEOUT, IORING_OP_WRITE, IORING_OP_WRITEV, IORING_RECV_MULTISHOT,
11 IOSQE_BUFFER_SELECT, IOSQE_FIXED_FILE, Iovec, SOCK_CLOEXEC, SOCK_NONBLOCK,
12};
13use crate::layout::{IoUringSqe, KernelTimespec};
14use crate::ring::IoUring;
15
16impl IoUring {
17 /// Queue a `read(fd)` of `len` bytes into `buf`, tagged with `user_data`.
18 /// Returns `false` if the SQ is full.
19 ///
20 /// # Safety
21 /// `buf` must point to `len` writable bytes and stay valid until the matching
22 /// completion is reaped.
23 pub unsafe fn prep_read(&mut self, fd: i32, buf: *mut u8, len: u32, user_data: u64) -> bool {
24 let Some(idx) = self.reserve() else {
25 return false;
26 };
27 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
28 unsafe {
29 ptr::write(
30 self.sqes_ptr().add(idx),
31 IoUringSqe::new(IORING_OP_READ, fd, buf as u64, len, user_data),
32 );
33 }
34 true
35 }
36
37 /// Queue a `write(fd)` of `len` bytes from `buf`, tagged with `user_data`.
38 /// Returns `false` if the SQ is full.
39 ///
40 /// # Safety
41 /// `buf` must point to `len` readable bytes and stay valid until the matching
42 /// completion is reaped.
43 pub unsafe fn prep_write(&mut self, fd: i32, buf: *const u8, len: u32, user_data: u64) -> bool {
44 let Some(idx) = self.reserve() else {
45 return false;
46 };
47 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
48 unsafe {
49 ptr::write(
50 self.sqes_ptr().add(idx),
51 IoUringSqe::new(IORING_OP_WRITE, fd, buf as u64, len, user_data),
52 );
53 }
54 true
55 }
56
57 /// Queue a `writev(fd, iov, iovcnt)`. L1 (2026-06-21): the reactor's
58 /// reply path uses this to fuse [header iovec, value-borrow iovec,
59 /// CRLF iovec] into one syscall — the per-GET memcpy of the value
60 /// into the conn output buffer is avoided.
61 ///
62 /// # Safety
63 /// `iov` must point to `iovcnt` valid `Iovec` entries, each `iov_base`
64 /// pointing to a readable byte range of length `iov_len`. The kernel
65 /// reads the iovec array AND each base asynchronously — both must
66 /// stay valid until the matching completion is reaped (the reactor
67 /// parks them in the conn's pending-writev state and drops on CQE).
68 pub unsafe fn prep_writev(
69 &mut self,
70 fd: i32,
71 iov: *const Iovec,
72 iovcnt: u32,
73 user_data: u64,
74 ) -> bool {
75 let Some(idx) = self.reserve() else {
76 return false;
77 };
78 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own
79 // alone. addr = iov pointer; len = iovcnt; off field (unused here)
80 // stays 0.
81 unsafe {
82 ptr::write(
83 self.sqes_ptr().add(idx),
84 IoUringSqe::new(IORING_OP_WRITEV, fd, iov as u64, iovcnt, user_data),
85 );
86 }
87 true
88 }
89
90 /// Queue a **multishot** `recv(fd)` that draws its destination buffer from
91 /// the provided-buffer group `bgid` (see [`IoUring::register_buf_ring`]): one
92 /// SQE re-fires a completion per arrival, the kernel picking + reporting a
93 /// buffer id each time, until it terminates (error / `ENOBUFS`, signalled by
94 /// [`crate::Completion::has_more`] returning `false`). No per-recv SQE, no read
95 /// buffer to keep alive. Returns `false` if the SQ is full.
96 pub fn prep_recv_multishot(&mut self, fd: i32, bgid: u16, user_data: u64) -> bool {
97 let Some(idx) = self.reserve() else {
98 return false;
99 };
100 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
101 unsafe {
102 let sqe = self.sqes_ptr().add(idx);
103 // addr/len 0: the buffer comes from the group, not from us.
104 ptr::write(sqe, IoUringSqe::new(IORING_OP_RECV, fd, 0, 0, user_data));
105 (*sqe).ioprio = IORING_RECV_MULTISHOT;
106 (*sqe).flags = IOSQE_BUFFER_SELECT;
107 // `buf_index` aliases `buf_group` in the kernel ABI.
108 (*sqe).buf_index = bgid;
109 }
110 true
111 }
112
113 /// Same as [`Self::prep_write`] but addresses the destination by
114 /// registered-files **slot index** instead of raw fd. Sets
115 /// `IOSQE_FIXED_FILE`; the kernel skips its per-op `fget`/`fput`. Caller
116 /// must have populated `slot` via
117 /// [`crate::IoUring::update_file_slot`].
118 ///
119 /// # Safety
120 /// Same as `prep_write`.
121 pub unsafe fn prep_write_fixed(
122 &mut self,
123 slot: u32,
124 buf: *const u8,
125 len: u32,
126 user_data: u64,
127 ) -> bool {
128 let Some(idx) = self.reserve() else {
129 return false;
130 };
131 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
132 unsafe {
133 let sqe = self.sqes_ptr().add(idx);
134 ptr::write(
135 sqe,
136 IoUringSqe::new(IORING_OP_WRITE, slot as i32, buf as u64, len, user_data),
137 );
138 (*sqe).flags = IOSQE_FIXED_FILE;
139 }
140 true
141 }
142
143 /// Same as [`Self::prep_recv_multishot`] but addresses the source by
144 /// registered-files **slot index** instead of raw fd. Sets
145 /// `IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT`; the kernel skips its
146 /// per-op `fget`/`fput`. Caller must have populated `slot` via
147 /// [`crate::IoUring::update_file_slot`].
148 pub fn prep_recv_multishot_fixed(
149 &mut self,
150 slot: u32,
151 bgid: u16,
152 user_data: u64,
153 ) -> bool {
154 let Some(idx) = self.reserve() else {
155 return false;
156 };
157 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
158 unsafe {
159 let sqe = self.sqes_ptr().add(idx);
160 ptr::write(
161 sqe,
162 IoUringSqe::new(IORING_OP_RECV, slot as i32, 0, 0, user_data),
163 );
164 (*sqe).ioprio = IORING_RECV_MULTISHOT;
165 (*sqe).flags = IOSQE_BUFFER_SELECT | IOSQE_FIXED_FILE;
166 (*sqe).buf_index = bgid;
167 }
168 true
169 }
170
171 /// Queue an `accept` on `listen_fd`; the accepted fd arrives as the
172 /// completion's `res` (already `O_NONBLOCK | O_CLOEXEC`). Returns `false` if
173 /// the SQ is full.
174 pub fn prep_accept(&mut self, listen_fd: i32, user_data: u64) -> bool {
175 let Some(idx) = self.reserve() else {
176 return false;
177 };
178 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
179 unsafe {
180 let sqe = self.sqes_ptr().add(idx);
181 ptr::write(
182 sqe,
183 IoUringSqe::new(IORING_OP_ACCEPT, listen_fd, 0, 0, user_data),
184 );
185 (*sqe).rw_flags = SOCK_NONBLOCK | SOCK_CLOEXEC;
186 }
187 true
188 }
189
190 /// Queue a **multishot** accept on `listen_fd` (Linux 5.19+). The kernel
191 /// keeps one SQE armed across many connections — each new fd arrives as
192 /// its own CQE with `IORING_CQE_F_MORE` set in `flags` while still armed.
193 /// When `F_MORE` is clear the multishot has terminated and userland must
194 /// re-arm via this fn (or fall back to [`Self::prep_accept`]). Caller
195 /// must keep `user_data` stable across the run of one multishot — each
196 /// CQE replays the same tag.
197 ///
198 /// B4 (2026-06-20): replaces the one-SQE-per-accept call site in
199 /// `kevy_rt::uring_reactor`. At -c1 (one persistent conn) zero
200 /// difference; under high-conn-churn workloads cuts an SQE + an
201 /// `arm_conns`-loop trip per accept.
202 pub fn prep_accept_multishot(&mut self, listen_fd: i32, user_data: u64) -> bool {
203 let Some(idx) = self.reserve() else {
204 return false;
205 };
206 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
207 unsafe {
208 let sqe = self.sqes_ptr().add(idx);
209 ptr::write(
210 sqe,
211 IoUringSqe::new(IORING_OP_ACCEPT, listen_fd, 0, 0, user_data),
212 );
213 (*sqe).rw_flags = SOCK_NONBLOCK | SOCK_CLOEXEC;
214 (*sqe).ioprio = IORING_ACCEPT_MULTISHOT;
215 }
216 true
217 }
218
219 /// Queue a relative timeout: the completion (res = `-ETIME`) arrives once
220 /// `ts` elapses, or earlier with res = 0 / `-ECANCELED` if the ring shuts
221 /// down. Bounds a blocking [`IoUring::submit_and_wait`] the way a poller's
222 /// wait-timeout would. Returns `false` if the SQ is full.
223 ///
224 /// # Safety
225 /// `ts` must stay valid (not moved or dropped) until the matching
226 /// completion is reaped — the kernel reads it asynchronously.
227 pub unsafe fn prep_timeout(&mut self, ts: *const KernelTimespec, user_data: u64) -> bool {
228 let Some(idx) = self.reserve() else {
229 return false;
230 };
231 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
232 unsafe {
233 let sqe = self.sqes_ptr().add(idx);
234 // addr = timespec ptr, len = 1 (one timespec), off = 0 (pure
235 // timeout — no completion-count trigger), rw_flags = 0 (relative).
236 ptr::write(
237 sqe,
238 IoUringSqe::new(IORING_OP_TIMEOUT, -1, ts as u64, 1, user_data),
239 );
240 }
241 true
242 }
243
244 /// Queue a no-op tagged with `user_data` (used to prove the round-trip).
245 /// Returns `false` if the SQ is full.
246 pub fn prep_nop(&mut self, user_data: u64) -> bool {
247 let Some(idx) = self.reserve() else {
248 return false;
249 };
250 // SAFETY: `idx` is a freshly reserved, in-bounds SQE slot we own alone.
251 unsafe {
252 ptr::write(
253 self.sqes_ptr().add(idx),
254 IoUringSqe::new(IORING_OP_NOP, -1, 0, 0, user_data),
255 );
256 }
257 true
258 }
259}