kvarn_quinn_udp/lib.rs
1//! Uniform interface to send and receive UDP packets with advanced features useful for QUIC
2//!
3//! This crate exposes kernel UDP stack features available on most modern systems which are required
4//! for an efficient and conformant QUIC implementation. As of this writing, these are not available
5//! in std or major async runtimes, and their niche character and complexity are a barrier to adding
6//! them. Hence, a dedicated crate.
7//!
8//! Exposed features include:
9//!
10//! - Segmentation offload for bulk send and receive operations, reducing CPU load.
11//! - Reporting the exact destination address of received packets and specifying explicit source
12//! addresses for sent packets, allowing responses to be sent from the address that the peer
13//! expects when there are multiple possibilities. This is common when bound to a wildcard address
14//! in IPv6 due to [RFC 8981] temporary addresses.
15//! - [Explicit Congestion Notification], which is required by QUIC to prevent packet loss and reduce
16//! latency on congested links when supported by the network path.
17//! - Disabled IP-layer fragmentation, which allows the true physical MTU to be detected and reduces
18//! risk of QUIC packet loss.
19//!
20//! Some features are unavailable in some environments. This can be due to an outdated operating
21//! system or drivers. Some operating systems may not implement desired features at all, or may not
22//! yet be supported by the crate. When support is unavailable, functionality will gracefully
23//! degrade.
24//!
25//! [RFC 8981]: https://www.rfc-editor.org/rfc/rfc8981.html
26//! [Explicit Congestion Notification]: https://www.rfc-editor.org/rfc/rfc3168.html
27#![warn(unreachable_pub)]
28#![warn(clippy::use_self)]
29
30#[cfg(unix)]
31use std::os::unix::io::AsFd;
32#[cfg(windows)]
33use std::os::windows::io::AsSocket;
34use std::{
35 net::{IpAddr, Ipv6Addr, SocketAddr},
36 sync::Mutex,
37 time::{Duration, Instant},
38};
39
40use bytes::Bytes;
41use tracing::warn;
42
43#[cfg(any(unix, windows))]
44mod cmsg;
45
46#[cfg(unix)]
47#[path = "unix.rs"]
48mod imp;
49
50#[cfg(windows)]
51#[path = "windows.rs"]
52mod imp;
53
54// No ECN support
55#[cfg(not(any(unix, windows)))]
56#[path = "fallback.rs"]
57mod imp;
58
59pub use imp::UdpSocketState;
60
61/// Number of UDP packets to send/receive at a time
62pub const BATCH_SIZE: usize = imp::BATCH_SIZE;
63
64/// Metadata for a single buffer filled with bytes received from the network
65///
66/// This associated buffer can contain one or more datagrams, see [`stride`].
67///
68/// [`stride`]: RecvMeta::stride
69#[derive(Debug, Copy, Clone)]
70pub struct RecvMeta {
71 /// The source address of the datagram(s) contained in the buffer
72 pub addr: SocketAddr,
73 /// The number of bytes the associated buffer has
74 pub len: usize,
75 /// The size of a single datagram in the associated buffer
76 ///
77 /// When GRO (Generic Receive Offload) is used this indicates the size of a single
78 /// datagram inside the buffer. If the buffer is larger, that is if [`len`] is greater
79 /// then this value, then the individual datagrams contained have their boundaries at
80 /// `stride` increments from the start. The last datagram could be smaller than
81 /// `stride`.
82 ///
83 /// [`len`]: RecvMeta::len
84 pub stride: usize,
85 /// The Explicit Congestion Notification bits for the datagram(s) in the buffer
86 pub ecn: Option<EcnCodepoint>,
87 /// The destination IP address which was encoded in this datagram
88 pub dst_ip: Option<IpAddr>,
89}
90
91impl Default for RecvMeta {
92 /// Constructs a value with arbitrary fields, intended to be overwritten
93 fn default() -> Self {
94 Self {
95 addr: SocketAddr::new(Ipv6Addr::UNSPECIFIED.into(), 0),
96 len: 0,
97 stride: 0,
98 ecn: None,
99 dst_ip: None,
100 }
101 }
102}
103
104/// An outgoing packet
105#[derive(Debug, Clone)]
106pub struct Transmit {
107 /// The socket this datagram should be sent to
108 pub destination: SocketAddr,
109 /// Explicit congestion notification bits to set on the packet
110 pub ecn: Option<EcnCodepoint>,
111 /// Contents of the datagram
112 pub contents: Bytes,
113 /// The segment size if this transmission contains multiple datagrams.
114 /// This is `None` if the transmit only contains a single datagram
115 pub segment_size: Option<usize>,
116 /// Optional source IP address for the datagram
117 pub src_ip: Option<IpAddr>,
118}
119
120/// Log at most 1 IO error per minute
121const IO_ERROR_LOG_INTERVAL: Duration = std::time::Duration::from_secs(60);
122
123/// Logs a warning message when sendmsg fails
124///
125/// Logging will only be performed if at least [`IO_ERROR_LOG_INTERVAL`]
126/// has elapsed since the last error was logged.
127fn log_sendmsg_error(
128 last_send_error: &Mutex<Instant>,
129 err: impl core::fmt::Debug,
130 transmit: &Transmit,
131) {
132 let now = Instant::now();
133 let last_send_error = &mut *last_send_error.lock().expect("poisend lock");
134 if now.saturating_duration_since(*last_send_error) > IO_ERROR_LOG_INTERVAL {
135 *last_send_error = now;
136 warn!(
137 "sendmsg error: {:?}, Transmit: {{ destination: {:?}, src_ip: {:?}, enc: {:?}, len: {:?}, segment_size: {:?} }}",
138 err, transmit.destination, transmit.src_ip, transmit.ecn, transmit.contents.len(), transmit.segment_size);
139 }
140}
141
142/// A borrowed UDP socket
143///
144/// On Unix, constructible via `From<T: AsRawFd>`. On Windows, constructible via `From<T:
145/// AsRawSocket>`.
146// Wrapper around socket2 to avoid making it a public dependency and incurring stability risk
147pub struct UdpSockRef<'a>(socket2::SockRef<'a>);
148
149#[cfg(unix)]
150impl<'s, S> From<&'s S> for UdpSockRef<'s>
151where
152 S: AsFd,
153{
154 fn from(socket: &'s S) -> Self {
155 Self(socket.into())
156 }
157}
158
159#[cfg(windows)]
160impl<'s, S> From<&'s S> for UdpSockRef<'s>
161where
162 S: AsSocket,
163{
164 fn from(socket: &'s S) -> Self {
165 Self(socket.into())
166 }
167}
168
169/// Explicit congestion notification codepoint
170#[repr(u8)]
171#[derive(Debug, Copy, Clone, Eq, PartialEq)]
172pub enum EcnCodepoint {
173 #[doc(hidden)]
174 Ect0 = 0b10,
175 #[doc(hidden)]
176 Ect1 = 0b01,
177 #[doc(hidden)]
178 Ce = 0b11,
179}
180
181impl EcnCodepoint {
182 /// Create new object from the given bits
183 pub fn from_bits(x: u8) -> Option<Self> {
184 use self::EcnCodepoint::*;
185 Some(match x & 0b11 {
186 0b10 => Ect0,
187 0b01 => Ect1,
188 0b11 => Ce,
189 _ => {
190 return None;
191 }
192 })
193 }
194}