xdp_socket/socket.rs
1//! # XDP Socket Implementation
2//!
3//! ## Purpose
4//!
5//! This file implements the `Socket` struct, which provides a high-level interface for
6//! interacting with XDP sockets. It supports both sending (TX) and receiving (RX) of
7//! packets with high performance through zero-copy data transfers.
8//!
9//! ## How it works
10//!
11//! The `Socket` utilizes two main components for its operation: a UMEM (Userspace Memory)
12//! region and associated rings for communication with the kernel. The UMEM is a memory-mapped
13//! area shared between the userspace application and the kernel, which allows for zero-copy
14//! packet processing.
15//!
16//! - For sending packets (TX), the application writes packet data directly into frames within
17//! the UMEM and then pushes descriptors to the TX ring, signaling the kernel to send them.
18//! - For receiving packets (RX), the application provides the kernel with descriptors pointing
19//! to free frames in the UMEM via the Fill ring. The kernel writes incoming packet data
20//! into these frames and notifies the application through the RX ring.
21//!
22//! ## Main components
23//!
24//! - `Socket<const t:_Direction>`: The primary struct representing an XDP socket. It is
25//! generic over the direction (TX or RX) to provide a type-safe API for each use case.
26//! - `Ring<T>`: A generic ring buffer implementation that is used for the TX/RX rings and
27//! the Fill/Completion rings for UMEM.
28//! - `Inner`: A struct that holds the owned file descriptor for the XDP socket and the
29//! memory-mapped UMEM region.
30//! - `TxSocket` and `RxSocket`: Type aliases for `Socket<true>` and `Socket<false>`
31//! respectively, providing a more intuitive API for users.
32
33#![allow(private_interfaces)]
34#![allow(private_bounds)]
35#![allow(non_upper_case_globals)]
36
37use crate::mmap::OwnedMmap;
38use crate::ring::{Ring, XdpDesc};
39use std::os::fd::{AsRawFd as _, OwnedFd};
40use std::{io, ptr};
41use std::fmt::Display;
42use std::sync::Arc;
43use std::time::Duration;
44
45/// A high-level interface for an AF_XDP socket.
46///
47/// This struct is generic over the `_Direction` const parameter, which determines
48/// whether the socket is for sending (`_TX`) or receiving (`_RX`).
49pub struct Socket<const t: _Direction> {
50 /// The inner shared state, including the file descriptor and UMEM.
51 pub(crate) _inner: Option<Arc<Inner>>,
52 /// The primary ring for sending (TX) or receiving (RX) descriptors.
53 pub(crate) x_ring: Ring<XdpDesc>,
54 /// The UMEM-associated ring: Completion Ring for TX, Fill Ring for RX.
55 pub(crate) u_ring: Ring<u64>,
56 /// The number of available descriptors in the `x_ring`.
57 pub(crate) available: u32,
58 /// The cached producer index for the `x_ring`.
59 pub(crate) producer: u32,
60 /// The cached consumer index for the `x_ring`.
61 pub(crate) consumer: u32,
62 /// A raw pointer to the start of the UMEM frames area.
63 pub(crate) frames: *mut u8,
64 /// -
65 pub(crate) raw_fd: libc::c_int,
66}
67
68/// An error that can occur during ring operations.
69#[derive(Debug)]
70pub enum RingError {
71 /// The ring is full, and no more descriptors can be produced.
72 RingFull,
73 /// The ring is empty, and no descriptors can be consumed.
74 RingEmpty,
75 /// Not enough descriptors or frames are available for the requested operation.
76 NotAvailable,
77 /// An invalid index was used to access a ring descriptor.
78 InvalidIndex,
79 /// The provided data length exceeds the available space in a UMEM frame.
80 InvalidLength,
81 /// An underlying I/O error occurred.
82 Io(io::Error),
83}
84
85impl Display for RingError {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 match self {
88 RingError::RingFull => write!(f, "Ring is full"),
89 RingError::RingEmpty => write!(f, "Ring is empty"),
90 RingError::NotAvailable => write!(f, "Not enough available frames"),
91 RingError::InvalidIndex => write!(f, "Invalid index for ring access"),
92 RingError::InvalidLength => write!(f, "Invalid length for ring access"),
93 RingError::Io(e) => write!(f, "I/O error: {e}"),
94 }
95 }
96}
97
98
99impl<const t: _Direction> Socket<t>
100where
101 Socket<t>: Seek_<t> + Commit_<t> + Send,
102{
103 /// Constructs a new `Socket`.
104 ///
105 /// This function initializes a socket for either sending or receiving based on the
106 /// generic const `t`. For TX sockets, it pre-fills the TX ring with descriptors
107 /// pointing to UMEM frames. For RX sockets, it pre-fills the Fill ring to provide
108 /// the kernel with available frames for incoming packets.
109 ///
110 /// # Arguments
111 ///
112 /// * `inner` - The shared inner socket state (file descriptor, UMEM).
113 /// * `x_ring` - The TX or RX ring.
114 /// * `u_ring` - The Completion or Fill ring.
115 /// * `skip_frames` - The number of frames to skip at the start of the UMEM.
116 pub(crate) fn new(
117 inner: Option<Arc<Inner>>,
118 mut x_ring: Ring<XdpDesc>,
119 mut u_ring: Ring<u64>,
120 skip_frames: usize,
121 ) -> Self {
122 if let Some(inner) = inner {
123 match t {
124 _TX => {
125 // all frames available for sending packets
126 x_ring.fill(skip_frames as u32);
127 }
128 _RX => {
129 // all frames available for receiving packets
130 u_ring.fill(skip_frames as u32);
131 u_ring.update_producer(u_ring.len as u32);
132 }
133 };
134 let frames = inner.umem.0 as *mut u8;
135 let raw_fd = inner.fd.as_raw_fd();
136 Self {
137 frames,
138 available: x_ring.len as u32,
139 producer: 0,
140 consumer: 0,
141 raw_fd,
142 _inner: Some(inner),
143 x_ring,
144 u_ring,
145 }
146 } else {
147 Self::default()
148 }
149 }
150
151 /// Waits for the socket to become ready for I/O, blocking until an event occurs.
152 ///
153 /// This function uses `poll` to wait for the socket's file descriptor to become
154 /// ready. For a `TxSocket`, it waits for `POLLOUT` (writable). For an `RxSocket`,
155 /// it waits for `POLLIN` (readable).
156 ///
157 /// # Arguments
158 ///
159 /// * `_timeout` - An optional timeout. If `None`, it blocks indefinitely.
160 ///
161 /// # Returns
162 ///
163 /// An `io::Result` indicating success or failure.
164 pub fn poll_wait(&self, _timeout: Option<Duration>) -> Result<(), io::Error> {
165 self.kick()?;
166 let mask = match t {
167 _TX => libc::POLLOUT,
168 _RX => libc::POLLIN,
169 };
170 unsafe {
171 loop {
172 let mut fds = [libc::pollfd {
173 events: mask,
174 revents: 0,
175 fd: self.raw_fd,
176 }];
177 if 0 > libc::poll(fds.as_mut_ptr(), 1, -1) {
178 //..
179 } else if (fds[0].revents & mask) != 0 {
180 break;
181 }
182 }
183 }
184 Ok(())
185 }
186
187 /// Ensures that at least one descriptor is available for the next operation and
188 /// returns the total number of available descriptors.
189 ///
190 /// For a `TxSocket`, this may involve reclaiming completed descriptors from the
191 /// Completion Ring. For an `RxSocket`, this checks for newly received packets.
192 ///
193 /// # Returns
194 ///
195 /// A `Result` containing the total number of available descriptors, or a
196 /// `RingError` if the operation fails.
197 #[inline]
198 pub fn seek(&mut self) -> Result<usize, RingError> {
199 self.seek_(1)
200 }
201
202 /// Ensures that at least `count` descriptors are available for the next operation
203 /// and returns the total number of available descriptors.
204 ///
205 /// For a `TxSocket`, this may involve reclaiming completed descriptors from the
206 /// Completion Ring. For an `RxSocket`, this checks for newly received packets.
207 ///
208 /// # Arguments
209 ///
210 /// * `count` - The desired number of available descriptors.
211 ///
212 /// # Returns
213 ///
214 /// A `Result` containing the total number of available descriptors, or a
215 /// `RingError` if the operation fails.
216 #[inline]
217 pub fn seek_n(&mut self, count: usize) -> Result<usize, RingError> {
218 self.seek_(count)
219 }
220
221 /// Commits one descriptor, making it available to the kernel.
222 ///
223 /// For a `TxSocket`, this signals to the kernel that a packet written to the
224 /// corresponding UMEM frame is ready to be sent.
225 ///
226 /// For an `RxSocket`, this returns a UMEM frame to the kernel's Fill Ring after
227 /// the application has finished processing the received packet, making the frame
228 /// available for new packets.
229 ///
230 /// # Returns
231 ///
232 /// `Ok(())` on success, or a `RingError` on failure.
233 #[inline]
234 pub fn commit(&mut self) -> Result<(), RingError> {
235 self.commit_(1)
236 }
237
238 /// Commits `n` descriptors, making them available to the kernel.
239 ///
240 /// For a `TxSocket`, this signals to the kernel that `n` packets are ready to be sent.
241 /// For an `RxSocket`, this returns `n` UMEM frames to the kernel's Fill Ring.
242 ///
243 /// # Arguments
244 ///
245 /// * `n` - The number of descriptors to commit.
246 ///
247 /// # Returns
248 ///
249 /// `Ok(())` on success, or a `RingError` on failure.
250 #[inline]
251 pub fn commit_n(&mut self, n: usize) -> Result<(), RingError> {
252 self.commit_(n)
253 }
254
255
256 /// Returns the size of a single frame in the UMEM.
257 ///
258 /// # Returns
259 ///
260 /// The size of a single frame in the UMEM in bytes.
261 #[inline]
262 pub fn frame_size(&self) -> usize {
263 self.x_ring.frame_size() as usize
264 }
265}
266
267// socket refers to shared mapped memory owned by _inner and rings
268// so all pointers can be safely send over threads
269// until mapped memory is alive
270unsafe impl<const t:_Direction> Send for Socket<t> {}
271
272
273/// A boolean flag indicating the direction of the socket (`true` for TX, `false` for RX).
274pub type _Direction = bool;
275
276/// Constant representing the Transmit (TX) direction.
277pub const _TX: _Direction = true;
278
279/// Constant representing the Receive (RX) direction.
280pub const _RX: _Direction = false;
281
282/// A type alias for a socket configured for sending packets.
283pub type TxSocket = Socket<_TX>;
284
285/// A type alias for a socket configured for receiving packets.
286pub type RxSocket = Socket<_RX>;
287
288impl<const t: _Direction> Default for Socket<t> {
289 fn default() -> Self {
290 Self {
291 _inner: None,
292 x_ring: Default::default(),
293 u_ring: Default::default(),
294 available: 0,
295 producer: 0,
296 consumer: 0,
297 frames: ptr::null_mut(),
298 raw_fd: 0
299 }
300 }
301}
302
303/// A trait for direction-specific seeking logic (TX vs. RX).
304pub(crate) trait Seek_<const t: _Direction> {
305 fn seek_(&mut self, count: usize) -> Result<usize, RingError>;
306}
307
308pub(crate) trait Commit_<const t: _Direction> {
309 fn commit_(&mut self, count: usize) -> Result<(), RingError>;
310}
311
312/// Holds the owned components of an XDP socket that can be shared.
313pub(crate) struct Inner {
314 /// The memory-mapped UMEM region.
315 umem: OwnedMmap,
316 /// The owned file descriptor for the AF_XDP socket.
317 fd: OwnedFd,
318}
319
320impl Inner {
321 /// Constructs a new `Inner` with the given UMEM and file descriptor.
322 pub(crate) fn new(umem: OwnedMmap, fd: OwnedFd) -> Self {
323 Self { umem, fd }
324 }
325}