xdp_socket/
socket.rs

1//! # XDP Socket Implementation
2//!
3//! ## Purpose
4//!
5//! This file implements the `Socket` struct, which provides a high-level interface for
6//! interacting with XDP sockets. It supports both sending (TX) and receiving (RX) of
7//! packets with high performance through zero-copy data transfers.
8//!
9//! ## How it works
10//!
11//! The `Socket` utilizes two main components for its operation: a UMEM (Userspace Memory)
12//! region and associated rings for communication with the kernel. The UMEM is a memory-mapped
13//! area shared between the userspace application and the kernel, which allows for zero-copy
14//! packet processing.
15//!
16//! - For sending packets (TX), the application writes packet data directly into frames within
17//!   the UMEM and then pushes descriptors to the TX ring, signaling the kernel to send them.
18//! - For receiving packets (RX), the application provides the kernel with descriptors pointing
19//!   to free frames in the UMEM via the Fill ring. The kernel writes incoming packet data
20//!   into these frames and notifies the application through the RX ring.
21//!
22//! ## Main components
23//!
24//! - `Socket<const t:_Direction>`: The primary struct representing an XDP socket. It is
25//!   generic over the direction (TX or RX) to provide a type-safe API for each use case.
26//! - `Ring<T>`: A generic ring buffer implementation that is used for the TX/RX rings and
27//!   the Fill/Completion rings for UMEM.
28//! - `Inner`: A struct that holds the owned file descriptor for the XDP socket and the
29//!   memory-mapped UMEM region.
30//! - `TxSocket` and `RxSocket`: Type aliases for `Socket<true>` and `Socket<false>`
31//!   respectively, providing a more intuitive API for users.
32
33#![allow(private_interfaces)]
34#![allow(private_bounds)]
35#![allow(non_upper_case_globals)]
36
37use crate::mmap::OwnedMmap;
38use crate::ring::{Ring, XdpDesc};
39use std::os::fd::{AsRawFd as _, OwnedFd};
40use std::{io, ptr};
41use std::fmt::Display;
42use std::sync::Arc;
43use std::time::Duration;
44
45/// A high-level interface for an AF_XDP socket.
46///
47/// This struct is generic over the `_Direction` const parameter, which determines
48/// whether the socket is for sending (`_TX`) or receiving (`_RX`).
49pub struct Socket<const t: _Direction> {
50    /// The inner shared state, including the file descriptor and UMEM.
51    pub(crate) _inner: Option<Arc<Inner>>,
52    /// The primary ring for sending (TX) or receiving (RX) descriptors.
53    pub(crate) x_ring: Ring<XdpDesc>,
54    /// The UMEM-associated ring: Completion Ring for TX, Fill Ring for RX.
55    pub(crate) u_ring: Ring<u64>,
56    /// The number of available descriptors in the `x_ring`.
57    pub(crate) available: u32,
58    /// The cached producer index for the `x_ring`.
59    pub(crate) producer: u32,
60    /// The cached consumer index for the `x_ring`.
61    pub(crate) consumer: u32,
62    /// A raw pointer to the start of the UMEM frames area.
63    pub(crate) frames: *mut u8,
64    /// -
65    pub(crate) raw_fd: libc::c_int,
66}
67
68/// An error that can occur during ring operations.
69#[derive(Debug)]
70pub enum RingError {
71    /// The ring is full, and no more descriptors can be produced.
72    RingFull,
73    /// The ring is empty, and no descriptors can be consumed.
74    RingEmpty,
75    /// Not enough descriptors or frames are available for the requested operation.
76    NotAvailable,
77    /// An invalid index was used to access a ring descriptor.
78    InvalidIndex,
79    /// The provided data length exceeds the available space in a UMEM frame.
80    InvalidLength,
81    /// An underlying I/O error occurred.
82    Io(io::Error),
83}
84
85impl Display for RingError {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        match self {
88            RingError::RingFull => write!(f, "Ring is full"),
89            RingError::RingEmpty => write!(f, "Ring is empty"),
90            RingError::NotAvailable => write!(f, "Not enough available frames"),
91            RingError::InvalidIndex => write!(f, "Invalid index for ring access"),
92            RingError::InvalidLength => write!(f, "Invalid length for ring access"),
93            RingError::Io(e) => write!(f, "I/O error: {e}"),
94        }
95    }
96}
97
98
99impl<const t: _Direction> Socket<t>
100where
101    Socket<t>: Seek_<t> + Commit_<t> + Send,
102{
103    /// Constructs a new `Socket`.
104    ///
105    /// This function initializes a socket for either sending or receiving based on the
106    /// generic const `t`. For TX sockets, it pre-fills the TX ring with descriptors
107    /// pointing to UMEM frames. For RX sockets, it pre-fills the Fill ring to provide
108    /// the kernel with available frames for incoming packets.
109    ///
110    /// # Arguments
111    ///
112    /// * `inner` - The shared inner socket state (file descriptor, UMEM).
113    /// * `x_ring` - The TX or RX ring.
114    /// * `u_ring` - The Completion or Fill ring.
115    /// * `skip_frames` - The number of frames to skip at the start of the UMEM.
116    pub(crate) fn new(
117        inner: Option<Arc<Inner>>,
118        mut x_ring: Ring<XdpDesc>,
119        mut u_ring: Ring<u64>,
120        skip_frames: usize,
121    ) -> Self {
122        if let Some(inner) = inner {
123            match t {
124                _TX => {
125                    // all frames available for sending packets
126                    x_ring.fill(skip_frames as u32);
127                }
128                _RX => {
129                    // all frames available for receiving packets
130                    u_ring.fill(skip_frames as u32);
131                    u_ring.update_producer(u_ring.len as u32);
132                }
133            };
134            let frames = inner.umem.0 as *mut u8;
135            let raw_fd = inner.fd.as_raw_fd();
136            Self {
137                frames,
138                available: x_ring.len as u32,
139                producer: 0,
140                consumer: 0,
141                raw_fd,
142                _inner: Some(inner),
143                x_ring,
144                u_ring,
145            }
146        } else {
147            Self::default()
148        }
149    }
150
151    /// Waits for the socket to become ready for I/O, blocking until an event occurs.
152    ///
153    /// This function uses `poll` to wait for the socket's file descriptor to become
154    /// ready. For a `TxSocket`, it waits for `POLLOUT` (writable). For an `RxSocket`,
155    /// it waits for `POLLIN` (readable).
156    ///
157    /// # Arguments
158    ///
159    /// * `_timeout` - An optional timeout. If `None`, it blocks indefinitely.
160    ///
161    /// # Returns
162    ///
163    /// An `io::Result` indicating success or failure.
164    pub fn poll_wait(&self, _timeout: Option<Duration>) -> Result<(), io::Error> {
165        self.kick()?;
166        let mask = match t {
167            _TX => libc::POLLOUT,
168            _RX => libc::POLLIN,
169        };
170        unsafe {
171            loop {
172                let mut fds = [libc::pollfd {
173                    events: mask,
174                    revents: 0,
175                    fd: self.raw_fd,
176                }];
177                if 0 > libc::poll(fds.as_mut_ptr(), 1, -1) {
178                    //..
179                } else if (fds[0].revents & mask) != 0 {
180                    break;
181                }
182            }
183        }
184        Ok(())
185    }
186    
187    /// Ensures that at least one descriptor is available for the next operation and
188    /// returns the total number of available descriptors.
189    ///
190    /// For a `TxSocket`, this may involve reclaiming completed descriptors from the
191    /// Completion Ring. For an `RxSocket`, this checks for newly received packets.
192    ///
193    /// # Returns
194    ///
195    /// A `Result` containing the total number of available descriptors, or a
196    /// `RingError` if the operation fails.
197    #[inline]
198    pub fn seek(&mut self) -> Result<usize, RingError> {
199        self.seek_(1)
200    }
201
202    /// Ensures that at least `count` descriptors are available for the next operation
203    /// and returns the total number of available descriptors.
204    ///
205    /// For a `TxSocket`, this may involve reclaiming completed descriptors from the
206    /// Completion Ring. For an `RxSocket`, this checks for newly received packets.
207    ///
208    /// # Arguments
209    ///
210    /// * `count` - The desired number of available descriptors.
211    ///
212    /// # Returns
213    ///
214    /// A `Result` containing the total number of available descriptors, or a
215    /// `RingError` if the operation fails.
216    #[inline]
217    pub fn seek_n(&mut self, count: usize) -> Result<usize, RingError> {
218        self.seek_(count)
219    }
220
221    /// Commits one descriptor, making it available to the kernel.
222    ///
223    /// For a `TxSocket`, this signals to the kernel that a packet written to the
224    /// corresponding UMEM frame is ready to be sent.
225    ///
226    /// For an `RxSocket`, this returns a UMEM frame to the kernel's Fill Ring after
227    /// the application has finished processing the received packet, making the frame
228    /// available for new packets.
229    ///
230    /// # Returns
231    ///
232    /// `Ok(())` on success, or a `RingError` on failure.
233    #[inline]
234    pub fn commit(&mut self) -> Result<(), RingError> {
235        self.commit_(1)
236    }
237
238    /// Commits `n` descriptors, making them available to the kernel.
239    ///
240    /// For a `TxSocket`, this signals to the kernel that `n` packets are ready to be sent.
241    /// For an `RxSocket`, this returns `n` UMEM frames to the kernel's Fill Ring.
242    ///
243    /// # Arguments
244    ///
245    /// * `n` - The number of descriptors to commit.
246    ///
247    /// # Returns
248    ///
249    /// `Ok(())` on success, or a `RingError` on failure.
250    #[inline]
251    pub fn commit_n(&mut self, n: usize) -> Result<(), RingError> {
252        self.commit_(n)
253    }
254
255
256    /// Returns the size of a single frame in the UMEM.
257    ///
258    /// # Returns
259    ///
260    /// The size of a single frame in the UMEM in bytes.
261    #[inline]
262    pub fn frame_size(&self) -> usize {
263        self.x_ring.frame_size() as usize
264    }
265}
266
267// socket refers to shared mapped memory owned by _inner and rings
268//  so all pointers can be safely send over threads
269//  until mapped memory is alive
270unsafe impl<const t:_Direction> Send for Socket<t> {}
271
272
273/// A boolean flag indicating the direction of the socket (`true` for TX, `false` for RX).
274pub type _Direction = bool;
275
276/// Constant representing the Transmit (TX) direction.
277pub const _TX: _Direction = true;
278
279/// Constant representing the Receive (RX) direction.
280pub const _RX: _Direction = false;
281
282/// A type alias for a socket configured for sending packets.
283pub type TxSocket = Socket<_TX>;
284
285/// A type alias for a socket configured for receiving packets.
286pub type RxSocket = Socket<_RX>;
287
288impl<const t: _Direction> Default for Socket<t> {
289    fn default() -> Self {
290        Self {
291            _inner: None,
292            x_ring: Default::default(),
293            u_ring: Default::default(),
294            available: 0,
295            producer: 0,
296            consumer: 0,
297            frames: ptr::null_mut(),
298            raw_fd: 0
299        }
300    }
301}
302
303/// A trait for direction-specific seeking logic (TX vs. RX).
304pub(crate) trait Seek_<const t: _Direction> {
305    fn seek_(&mut self, count: usize) -> Result<usize, RingError>;
306}
307
308pub(crate) trait Commit_<const t: _Direction> {
309    fn commit_(&mut self, count: usize) -> Result<(), RingError>;
310}
311
312/// Holds the owned components of an XDP socket that can be shared.
313pub(crate) struct Inner {
314    /// The memory-mapped UMEM region.
315    umem: OwnedMmap,
316    /// The owned file descriptor for the AF_XDP socket.
317    fd: OwnedFd,
318}
319
320impl Inner {
321    /// Constructs a new `Inner` with the given UMEM and file descriptor.
322    pub(crate) fn new(umem: OwnedMmap, fd: OwnedFd) -> Self {
323        Self { umem, fd }
324    }
325}