xdp_socket/create.rs
1//! # AF_XDP Socket Creation and Configuration
2//!
3//! ## Purpose
4//!
5//! This file contains the logic for creating and configuring AF_XDP sockets. It provides
6//! a high-level API to set up sockets for transmit-only, receive-only, or
7//! bidirectional packet processing, abstracting away many of the low-level details.
8//!
9//! ## How it works
10//!
11//! It uses `libc` syscalls to create a raw AF_XDP socket. It then allocates a UMEM
12//! (Userspace Memory) region for zero-copy data transfers, configures the necessary
13//! rings (TX, RX, Fill, Completion) with appropriate sizes, maps them into memory,
14//! and binds the socket to a specific network interface and queue. The logic handles
15//! different UMEM and ring configurations based on whether the socket is for TX, RX,
16//! or both.
17//!
18//! ## Main components
19//!
20//! - `create_socket()`: The core unsafe function that handles the detailed setup logic.
21//! - `create_tx_socket()`, `create_rx_socket()`, `create_bi_socket()`: Safe public
22//! functions that wrap `create_socket` for specific use cases.
23//! - `setup_umem()`: A helper function to allocate and register the UMEM with the kernel.
24//! - `ring_offsets()`: A helper to query the kernel for the memory map offsets of the rings.
25//! - `XdpConfig`, `Direction`: Public structs and enums for socket configuration.
26
27use crate::mmap::OwnedMmap;
28use crate::ring::{FRAME_COUNT, FRAME_SIZE, Ring, RingType, XdpDesc};
29use crate::socket::{Inner, RxSocket, TxSocket};
30use std::io;
31use std::mem::size_of;
32use std::os::fd::{FromRawFd as _, OwnedFd};
33use std::sync::Arc;
34
35/// Creates one or two sockets for AF_XDP packet processing.
36///
37/// This is the core function for setting up AF_XDP sockets. It handles UMEM
38/// allocation, ring configuration, and binding to a network interface queue.
39///
40/// # How it works
41///
42/// 1. Creates a raw `AF_XDP` socket.
43/// 2. Calls `setup_umem` to create a memory-mapped UMEM region.
44/// 3. Sets the sizes for the Fill, Completion, TX, and RX rings via `setsockopt`.
45/// 4. Retrieves the memory map offsets for the rings from the kernel.
46/// 5. Memory-maps the required rings based on the specified `Direction`.
47/// 6. Binds the socket to the given interface index and queue ID, enabling zero-copy
48/// and need-wakeup flags based on the config.
49/// 7. Wraps the components in `TxSocket` and/or `RxSocket` and returns them.
50///
51/// # Arguments
52/// * `if_index` - The index of the network interface to bind to.
53/// * `if_queue` - The queue index of the interface to bind to.
54/// * `direction` - The desired direction(s) for the socket (`Tx`, `Rx`, or `Both`).
55/// * `config` - Optional configuration for zero-copy, huge pages, etc.
56///
57/// # Returns
58/// A tuple `(Option<TxSocket>, Option<RxSocket>)`. The appropriate socket(s) will be
59/// `Some` based on the `direction`.
60///
61/// # Safety
62/// This function is unsafe because it directly interfaces with low-level Linux APIs.
63/// The caller must ensure the provided parameters are valid.
64pub fn create_socket(
65 if_index: u32,
66 if_queue: u32,
67 direction: Direction,
68 config: Option<XdpConfig>,
69) -> Result<(Option<TxSocket>, Option<RxSocket>), io::Error> {
70 let (rx_ring_size, tx_ring_size) = match direction {
71 Direction::Tx => (0, FRAME_COUNT), // all frames for outgoing packets
72 Direction::Rx => (FRAME_COUNT, 0), // all frames for incoming packets
73 Direction::Both => (FRAME_COUNT / 2, FRAME_COUNT / 2), // split frames for both directions
74 };
75
76 let (fd, raw_fd) = unsafe {
77 let fd = libc::socket(libc::AF_XDP, libc::SOCK_RAW | libc::SOCK_CLOEXEC, 0);
78 if fd < 0 {
79 return Err(io::Error::last_os_error());
80 }
81 (OwnedFd::from_raw_fd(fd), fd)
82 };
83 let umem = setup_umem(raw_fd, config.as_ref())?;
84
85 RingType::Fill.set_size(raw_fd, tx_ring_size)?;
86 RingType::Completion.set_size(raw_fd, tx_ring_size)?;
87 if tx_ring_size > 0 {
88 RingType::Tx.set_size(raw_fd, tx_ring_size)?;
89 }
90 if rx_ring_size > 0 {
91 RingType::Rx.set_size(raw_fd, rx_ring_size)?;
92 }
93
94 let offsets = ring_offsets(raw_fd)?;
95
96 // Mapping Tx rings in case of Tx and Both direction
97 let (c_ring, tx_ring) = if direction == Direction::Rx {
98 (Ring::default(), Ring::default())
99 } else {
100 (
101 RingType::Completion.mmap(raw_fd, &offsets, tx_ring_size)?,
102 {
103 let mut tx_ring: Ring<XdpDesc> =
104 RingType::Tx.mmap(raw_fd, &offsets, tx_ring_size)?;
105 tx_ring.fill(0);
106 tx_ring
107 },
108 )
109 };
110
111 // Mapping Rx rings in case of Rx and Both direction
112 let (rx_ring, f_ring) = if direction == Direction::Tx {
113 (Ring::default(), Ring::default())
114 } else {
115 (RingType::Rx.mmap(raw_fd, &offsets, rx_ring_size)?, {
116 let mut f_ring: Ring<u64> = RingType::Fill.mmap(raw_fd, &offsets, rx_ring_size)?;
117 f_ring.fill(tx_ring_size as u32);
118 f_ring.update_producer(f_ring.len as u32);
119 f_ring
120 })
121 };
122
123 let zero_copy = match config.and_then(|cfg| cfg.zero_copy) {
124 Some(true) => libc::XDP_ZEROCOPY,
125 Some(false) => libc::XDP_COPY,
126 None => 0,
127 };
128
129 let need_wakeup = if config.and_then(|cfg| cfg.need_wakeup).unwrap_or(true) {
130 libc::XDP_USE_NEED_WAKEUP
131 } else {
132 0
133 };
134
135 let sxdp = libc::sockaddr_xdp {
136 sxdp_family: libc::AF_XDP as libc::sa_family_t,
137 sxdp_flags: need_wakeup | zero_copy,
138 sxdp_ifindex: if_index,
139 sxdp_queue_id: if_queue,
140 sxdp_shared_umem_fd: 0,
141 };
142
143 if unsafe {
144 libc::bind(
145 raw_fd,
146 &sxdp as *const _ as *const libc::sockaddr,
147 size_of::<libc::sockaddr_xdp>() as libc::socklen_t,
148 ) < 0
149 } {
150 return Err(io::Error::other(format!(
151 "Failed to bind: {}",
152 io::Error::last_os_error()
153 )));
154 }
155
156 // its just owned shared memory and socket descriptor
157 // that we can share between Tx and Rx sockets
158 // to release it when both are destroyed
159 #[allow(clippy::arc_with_non_send_sync)]
160 let inner = Arc::new(Inner::new(umem, fd));
161
162 let tx_socket = if direction != Direction::Rx {
163 Some(TxSocket::new(Some(inner.clone()), tx_ring, c_ring))
164 } else {
165 None
166 };
167
168 let rx_socket = if direction != Direction::Tx {
169 Some(RxSocket::new(Some(inner.clone()), rx_ring, f_ring))
170 } else {
171 None
172 };
173
174 Ok((tx_socket, rx_socket))
175}
176
177/// Creates a `TxSocket` for sending packets.
178///
179/// This is a convenience wrapper around `create_socket` for transmit-only use cases.
180///
181/// # Arguments
182/// * `if_index` - The index of the network interface to use.
183/// * `if_queue` - The queue ID of the network interface to use.
184/// * `config` - Optional `XdpConfig` to customize the socket.
185///
186/// # Returns
187/// A `Result` containing a `TxSocket` on success, or an `io::Error` on failure.
188pub fn create_tx_socket(
189 if_index: u32,
190 if_queue: u32,
191 config: Option<XdpConfig>,
192) -> Result<TxSocket, io::Error> {
193 let (tx_socket, _) = create_socket(if_index, if_queue, Direction::Tx, config)?;
194 tx_socket.ok_or_else(|| io::Error::other("Failed to create Tx socket"))
195}
196
197/// Creates an `RxSocket` for receiving packets.
198///
199/// This is a convenience wrapper around `create_socket` for receive-only use cases.
200///
201/// # Arguments
202/// * `if_index` - The index of the network interface to use.
203/// * `if_queue` - The queue ID of the network interface to use.
204/// * `config` - Optional `XdpConfig` to customize the socket.
205///
206/// # Returns
207/// A `Result` containing an `RxSocket` on success, or an `io::Error` on failure.
208pub fn create_rx_socket(
209 if_index: u32,
210 if_queue: u32,
211 config: Option<XdpConfig>,
212) -> Result<RxSocket, io::Error> {
213 let (_, rx_socket) = create_socket(if_index, if_queue, Direction::Rx, config)?;
214 rx_socket.ok_or_else(|| io::Error::other("Failed to create Rx socket"))
215}
216
217/// Creates a pair of sockets (`TxSocket`, `RxSocket`) for bidirectional communication.
218///
219/// This is a convenience wrapper around `create_socket` for bidirectional use cases.
220/// The UMEM frame pool is split between the two sockets.
221///
222/// # Arguments
223/// * `if_index` - The index of the network interface to use.
224/// * `if_queue` - The queue ID of the network interface to use.
225/// * `config` - Optional `XdpConfig` to customize the sockets.
226///
227/// # Returns
228/// A `Result` containing a tuple of `(TxSocket, RxSocket)` on success, or an `io::Error` on failure.
229pub fn create_bi_socket(
230 if_index: u32,
231 if_queue: u32,
232 config: Option<XdpConfig>,
233) -> Result<(TxSocket, RxSocket), io::Error> {
234 let (tx_socket, rx_socket) = create_socket(if_index, if_queue, Direction::Both, config)?;
235 Ok((
236 tx_socket.ok_or_else(|| io::Error::other("Failed to create Tx socket"))?,
237 rx_socket.ok_or_else(|| io::Error::other("Failed to create Rx socket"))?,
238 ))
239}
240
241/// Retrieves the memory map offsets for the AF_XDP rings from the kernel.
242///
243/// This function uses `getsockopt` with `XDP_MMAP_OFFSETS` to query the kernel for
244/// the correct offsets of the producer/consumer indices and descriptor arrays for
245/// all four rings.
246///
247/// # Arguments
248/// * `raw_fd` - The raw file descriptor of the AF_XDP socket.
249pub fn ring_offsets(raw_fd: libc::c_int) -> io::Result<libc::xdp_mmap_offsets> {
250 let mut offsets: libc::xdp_mmap_offsets = unsafe { std::mem::zeroed() };
251 let mut optlen = size_of::<libc::xdp_mmap_offsets>() as libc::socklen_t;
252 unsafe {
253 if libc::getsockopt(
254 raw_fd,
255 libc::SOL_XDP,
256 libc::XDP_MMAP_OFFSETS,
257 &mut offsets as *mut _ as *mut libc::c_void,
258 &mut optlen,
259 ) < 0
260 {
261 return Err(io::Error::last_os_error());
262 }
263 }
264 Ok(offsets)
265}
266
267/// Allocates and registers the UMEM (Userspace Memory) region with the kernel.
268///
269/// # How it works
270///
271/// 1. It calls `OwnedMmap::mmap` to create a memory-mapped region, optionally
272/// backed by huge pages.
273/// 2. It populates an `xdp_umem_reg` struct with the address and size of the UMEM.
274/// 3. It calls `setsockopt` with `XDP_UMEM_REG` to register the UMEM with the
275/// kernel, making it available for zero-copy operations.
276///
277/// # Arguments
278/// * `raw_fd` - The raw file descriptor of the AF_XDP socket.
279/// * `config` - Optional configuration, used to determine if huge pages should be used.
280pub fn setup_umem(raw_fd: libc::c_int, config: Option<&XdpConfig>) -> io::Result<OwnedMmap> {
281 let umem = OwnedMmap::mmap(
282 FRAME_COUNT * FRAME_SIZE,
283 config.and_then(|cfg| cfg.huge_page),
284 )
285 .map_err(|e| io::Error::other(format!("Failed to allocate UMEM: {}", e)))?;
286
287 let reg = unsafe {
288 libc::xdp_umem_reg {
289 addr: umem.as_void_ptr() as u64,
290 len: umem.len() as u64,
291 chunk_size: FRAME_SIZE as u32,
292 ..std::mem::zeroed()
293 }
294 };
295
296 unsafe {
297 if libc::setsockopt(
298 raw_fd,
299 libc::SOL_XDP,
300 libc::XDP_UMEM_REG,
301 ® as *const _ as *const libc::c_void,
302 size_of::<libc::xdp_umem_reg>() as libc::socklen_t,
303 ) < 0
304 {
305 return Err(io::Error::other(format!(
306 "Failed to register UMEM: {}",
307 io::Error::last_os_error()
308 )));
309 }
310 }
311
312 Ok(umem)
313}
314
315/// Specifies the direction of an AF_XDP socket.
316#[derive(Copy, Clone, Debug, PartialEq)]
317#[repr(i32)]
318pub enum Direction {
319 /// Transmit-only socket.
320 Tx = 0,
321 /// Receive-only socket.
322 Rx = 1,
323 /// Bidirectional socket (both transmit and receive).
324 Both = -1,
325}
326
327/// Configuration options for creating an AF_XDP socket.
328#[derive(Debug, Copy, Clone, Default)]
329pub struct XdpConfig {
330 /// Enables or disables zero-copy mode.
331 ///
332 /// - `Some(true)`: Enables `XDP_ZEROCOPY`.
333 /// - `Some(false)`: Enables `XDP_COPY`.
334 /// - `None`: The kernel's default behavior is used (typically copy mode).
335 pub zero_copy: Option<bool>,
336 /// Enables or disables huge pages for the UMEM.
337 ///
338 /// - `Some(true)`: Attempts to use huge pages.
339 /// - `Some(false)`: Uses standard page sizes.
340 /// - `None`: The implementation default is used (typically standard pages).
341 pub huge_page: Option<bool>,
342 /// Sets the `XDP_USE_NEED_WAKEUP` flag.
343 ///
344 /// - `Some(true)`: The flag is set. The application must call `kick()` to wake up the kernel.
345 /// - `Some(false)`: The flag is not set. The kernel polls without needing a wakeup call.
346 /// - `None`: Defaults to `true`.
347 pub need_wakeup: Option<bool>,
348}