s2n_quic_core/path/mtu.rs
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5 counter::{Counter, Saturating},
6 event::{self, builder::MtuUpdatedCause, IntoEvent},
7 frame, inet,
8 packet::number::PacketNumber,
9 path::{self, mtu},
10 recovery::{congestion_controller, CongestionController},
11 time::{timer, Timer, Timestamp},
12 transmission,
13};
14use core::{
15 fmt,
16 fmt::{Display, Formatter},
17 num::NonZeroU16,
18 time::Duration,
19};
20use s2n_codec::EncoderValue;
21
22#[cfg(test)]
23mod tests;
24
25#[cfg(any(test, feature = "testing"))]
26pub mod testing {
27 use super::*;
28 use crate::inet::{IpV4Address, SocketAddressV4};
29
30 /// Creates a new mtu::Controller with an IPv4 address and the given `max_mtu`
31 pub fn new_controller(max_mtu: u16) -> Controller {
32 let ip = IpV4Address::new([127, 0, 0, 1]);
33 let addr = inet::SocketAddress::IpV4(SocketAddressV4::new(ip, 443));
34 Controller::new(
35 Config {
36 max_mtu: max_mtu.try_into().unwrap(),
37 ..Default::default()
38 },
39 &addr,
40 )
41 }
42
43 /// Creates a new mtu::Controller with the given mtu and probed size
44 pub fn test_controller(mtu: u16, probed_size: u16) -> Controller {
45 let mut controller = new_controller(u16::MAX);
46 controller.plpmtu = mtu;
47 controller.probed_size = probed_size;
48 controller
49 }
50}
51
52#[derive(Clone, Debug, PartialEq, Eq)]
53enum State {
54 /// EARLY_SEARCH_REQUESTED indicates the initial MTU was configured higher
55 /// than the base MTU, to allow for quick confirmation or rejection of the
56 /// initial MTU
57 EarlySearchRequested,
58 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.2
59 //# The DISABLED state is the initial state before probing has started.
60 Disabled,
61 /// SEARCH_REQUESTED is used to indicate a probe packet has been requested
62 /// to be transmitted, but has not been transmitted yet.
63 SearchRequested,
64 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.2
65 //# The SEARCHING state is the main probing state.
66 Searching(PacketNumber, Timestamp),
67 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.2
68 //# The SEARCH_COMPLETE state indicates that a search has completed.
69 SearchComplete,
70}
71
72impl State {
73 /// Returns true if the MTU controller is in the early search requested state
74 fn is_early_search_requested(&self) -> bool {
75 matches!(self, State::EarlySearchRequested)
76 }
77
78 /// Returns true if the MTU controller is in the disabled state
79 fn is_disabled(&self) -> bool {
80 matches!(self, State::Disabled)
81 }
82
83 /// Returns true if the MTU controller is in the search complete state
84 fn is_search_complete(&self) -> bool {
85 matches!(self, State::SearchComplete)
86 }
87}
88
89//= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.2
90//# The MAX_PROBES is the maximum value of the PROBE_COUNT
91//# counter (see Section 5.1.3). MAX_PROBES represents the limit for
92//# the number of consecutive probe attempts of any size. Search
93//# algorithms benefit from a MAX_PROBES value greater than 1 because
94//# this can provide robustness to isolated packet loss. The default
95//# value of MAX_PROBES is 3.
96const MAX_PROBES: u8 = 3;
97
98/// The minimum length of the data field of a packet sent over an
99/// Ethernet is 1500 octets, thus the maximum length of an IP datagram
100/// sent over an Ethernet is 1500 octets.
101/// See https://www.rfc-editor.org/rfc/rfc894.txt
102const ETHERNET_MTU: u16 = 1500;
103
104/// If the next value to probe is within the PROBE_THRESHOLD bytes of
105/// the current Path MTU, probing will be considered complete.
106const PROBE_THRESHOLD: u16 = 20;
107
108/// When the black_hole_counter exceeds this threshold, on_black_hole_detected will be
109/// called to reduce the MTU to the BASE_PLPMTU. The black_hole_counter is incremented when
110/// a burst of consecutive packets is lost that starts with a packet that is:
111/// 1) not an MTU probe
112/// 2) larger than the BASE_PLPMTU
113/// 3) sent after the largest MTU-sized acknowledged packet number
114/// This is a possible indication that the path cannot support the MTU that was previously confirmed.
115const BLACK_HOLE_THRESHOLD: u8 = 3;
116
117/// After a black hole has been detected, the mtu::Controller will wait this duration
118/// before probing for a larger MTU again.
119const BLACK_HOLE_COOL_OFF_DURATION: Duration = Duration::from_secs(60);
120
121//= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.1
122//# The PMTU_RAISE_TIMER is configured to the period a
123//# sender will continue to use the current PLPMTU, after which it
124//# reenters the Search Phase. This timer has a period of 600
125//# seconds, as recommended by PLPMTUD [RFC4821].
126const PMTU_RAISE_TIMER_DURATION: Duration = Duration::from_secs(600);
127
128//= https://www.rfc-editor.org/rfc/rfc9000#section-14
129//# QUIC MUST NOT be used if the network path cannot support a
130//# maximum datagram size of at least 1200 bytes.
131
132//= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.2
133//# When using IPv4, there is no currently equivalent size specified,
134//# and a default BASE_PLPMTU of 1200 bytes is RECOMMENDED.
135pub const MINIMUM_MAX_DATAGRAM_SIZE: u16 = 1200;
136
137// Length is the length in octets of this user datagram including this
138// header and the data. (This means the minimum value of the length is
139// eight.)
140// See https://www.rfc-editor.org/rfc/rfc768.txt
141const UDP_HEADER_LEN: u16 = 8;
142
143// IPv4 header ranges from 20-60 bytes, depending on Options
144const IPV4_MIN_HEADER_LEN: u16 = 20;
145// IPv6 header is always 40 bytes, plus extensions
146const IPV6_MIN_HEADER_LEN: u16 = 40;
147
148// The minimum allowed Max MTU is the minimum UDP datagram size of 1200 bytes plus
149// the UDP header length and minimal IP header length
150const fn const_min(a: u16, b: u16) -> u16 {
151 if a < b {
152 a
153 } else {
154 b
155 }
156}
157
158const MINIMUM_MTU: u16 = MINIMUM_MAX_DATAGRAM_SIZE
159 + UDP_HEADER_LEN
160 + const_min(IPV4_MIN_HEADER_LEN, IPV6_MIN_HEADER_LEN);
161
162macro_rules! impl_mtu {
163 ($name:ident, $default:expr) => {
164 #[derive(Clone, Copy, Debug, PartialEq)]
165 pub struct $name(NonZeroU16);
166
167 impl $name {
168 /// The minimum value required for path MTU
169 pub const MIN: Self = Self(unsafe { NonZeroU16::new_unchecked(MINIMUM_MTU) });
170
171 /// The largest size of a QUIC datagram that can be sent on a path that supports this
172 /// MTU. This does not include the size of UDP and IP headers.
173 #[inline]
174 pub fn max_datagram_size(&self, peer_socket_address: &inet::SocketAddress) -> u16 {
175 let min_ip_header_len = match peer_socket_address {
176 inet::SocketAddress::IpV4(_) => IPV4_MIN_HEADER_LEN,
177 inet::SocketAddress::IpV6(_) => IPV6_MIN_HEADER_LEN,
178 };
179 (u16::from(*self) - UDP_HEADER_LEN - min_ip_header_len)
180 .max(MINIMUM_MAX_DATAGRAM_SIZE)
181 }
182 }
183
184 impl Default for $name {
185 #[inline]
186 fn default() -> Self {
187 $default
188 }
189 }
190
191 impl TryFrom<u16> for $name {
192 type Error = MtuError;
193
194 fn try_from(value: u16) -> Result<Self, Self::Error> {
195 if value < MINIMUM_MTU {
196 return Err(MtuError);
197 }
198
199 Ok($name(value.try_into().expect(
200 "Value must be greater than zero according to the check above",
201 )))
202 }
203 }
204
205 impl From<$name> for usize {
206 #[inline]
207 fn from(value: $name) -> Self {
208 value.0.get() as usize
209 }
210 }
211
212 impl From<$name> for u16 {
213 #[inline]
214 fn from(value: $name) -> Self {
215 value.0.get()
216 }
217 }
218 };
219}
220
221// Safety: 1500 and MINIMUM_MTU are greater than zero
222const DEFAULT_MAX_MTU: MaxMtu = MaxMtu(NonZeroU16::new(1500).unwrap());
223const DEFAULT_BASE_MTU: BaseMtu = BaseMtu(NonZeroU16::new(MINIMUM_MTU).unwrap());
224const DEFAULT_INITIAL_MTU: InitialMtu = InitialMtu(NonZeroU16::new(MINIMUM_MTU).unwrap());
225
226impl_mtu!(MaxMtu, DEFAULT_MAX_MTU);
227impl_mtu!(InitialMtu, DEFAULT_INITIAL_MTU);
228impl_mtu!(BaseMtu, DEFAULT_BASE_MTU);
229
230#[derive(Debug, Eq, PartialEq)]
231pub struct MtuError;
232
233impl Display for MtuError {
234 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
235 write!(
236 f,
237 "MTU must have {} <= base_mtu (default: {}) <= initial_mtu (default: {}) <= max_mtu (default: {})",
238 MINIMUM_MTU, DEFAULT_BASE_MTU.0, DEFAULT_INITIAL_MTU.0, DEFAULT_MAX_MTU.0
239 )
240 }
241}
242
243impl core::error::Error for MtuError {}
244
245/// Information about the path that may be used when generating MTU configuration.
246#[non_exhaustive]
247pub struct PathInfo<'a> {
248 pub remote_address: event::api::SocketAddress<'a>,
249}
250
251impl<'a> PathInfo<'a> {
252 #[inline]
253 #[doc(hidden)]
254 pub fn new(remote_address: &'a inet::SocketAddress) -> Self {
255 PathInfo {
256 remote_address: remote_address.into_event(),
257 }
258 }
259}
260
261/// MTU configuration manager.
262#[derive(Debug)]
263pub struct Manager<E: mtu::Endpoint> {
264 provider: E,
265 endpoint_mtu_config: Config,
266}
267
268impl<E: mtu::Endpoint> Manager<E> {
269 pub fn new(provider: E) -> Self {
270 Manager {
271 provider,
272 // Instantiate the Manager with default values since the endpoint is
273 // created before the IO provider (IO provider sets the actual config `set_mtu_config()`).
274 endpoint_mtu_config: Default::default(),
275 }
276 }
277
278 pub fn config(&mut self, remote_address: &inet::SocketAddress) -> Result<Config, MtuError> {
279 let info = mtu::PathInfo::new(remote_address);
280 if let Some(conn_config) = self.provider.on_path(&info, self.endpoint_mtu_config) {
281 ensure!(conn_config.is_valid(), Err(MtuError));
282 ensure!(
283 u16::from(conn_config.max_mtu) <= u16::from(self.endpoint_mtu_config.max_mtu()),
284 Err(MtuError)
285 );
286
287 Ok(conn_config)
288 } else {
289 Ok(self.endpoint_mtu_config)
290 }
291 }
292
293 pub fn set_endpoint_config(&mut self, config: Config) {
294 self.endpoint_mtu_config = config;
295 }
296
297 pub fn endpoint_config(&self) -> &Config {
298 &self.endpoint_mtu_config
299 }
300}
301
302/// Specify MTU configuration for the given path.
303pub trait Endpoint: 'static + Send {
304 /// Provide path specific MTU config.
305 ///
306 /// The MTU provider is invoked for each new path established during a
307 /// connection. Returning `None` means that the path should inherit
308 /// the endpoint configured values.
309 ///
310 /// Application must ensure that `max_mtu <= endpoint_mtu_config.max_mtu()`.
311 fn on_path(&mut self, info: &mtu::PathInfo, endpoint_mtu_config: Config)
312 -> Option<mtu::Config>;
313}
314
315/// Inherit the endpoint configured values.
316#[derive(Debug, Default)]
317pub struct Inherit {}
318
319impl Endpoint for Inherit {
320 fn on_path(
321 &mut self,
322 _info: &mtu::PathInfo,
323 _endpoint_mtu_config: Config,
324 ) -> Option<mtu::Config> {
325 None
326 }
327}
328
329/// MTU configuration.
330#[derive(Copy, Clone, Debug, Default)]
331pub struct Config {
332 initial_mtu: InitialMtu,
333 base_mtu: BaseMtu,
334 max_mtu: MaxMtu,
335}
336
337impl Endpoint for Config {
338 fn on_path(
339 &mut self,
340 _info: &mtu::PathInfo,
341 _endpoint_mtu_config: Config,
342 ) -> Option<mtu::Config> {
343 Some(*self)
344 }
345}
346
347impl Config {
348 pub const MIN: Self = Self {
349 initial_mtu: InitialMtu::MIN,
350 base_mtu: BaseMtu::MIN,
351 max_mtu: MaxMtu::MIN,
352 };
353
354 pub fn builder() -> Builder {
355 Builder::default()
356 }
357
358 /// The maximum transmission unit (MTU) to use when initiating a connection.
359 pub fn initial_mtu(&self) -> InitialMtu {
360 self.initial_mtu
361 }
362
363 /// The smallest maximum transmission unit (MTU) to use when transmitting.
364 pub fn base_mtu(&self) -> BaseMtu {
365 self.base_mtu
366 }
367
368 /// The largest maximum transmission unit (MTU) that can be sent on a path.
369 pub fn max_mtu(&self) -> MaxMtu {
370 self.max_mtu
371 }
372
373 /// Returns true if the MTU configuration is valid
374 ///
375 /// A valid MTU configuration must have base_mtu <= initial_mtu <= max_mtu
376 #[inline]
377 pub fn is_valid(&self) -> bool {
378 self.base_mtu.0 <= self.initial_mtu.0 && self.initial_mtu.0 <= self.max_mtu.0
379 }
380}
381
382#[derive(Debug, Default)]
383pub struct Builder {
384 initial_mtu: Option<InitialMtu>,
385 base_mtu: Option<BaseMtu>,
386 max_mtu: Option<MaxMtu>,
387}
388
389impl Builder {
390 /// Sets the maximum transmission unit (MTU) to use when initiating a connection (default: 1228)
391 ///
392 /// For a detailed description see the [with_initial_mtu] documentation in the IO provider.
393 ///
394 /// [with_initial_mtu]: https://docs.rs/s2n-quic/latest/s2n_quic/provider/io/tokio/struct.Builder.html#method.with_initial_mtu
395 pub fn with_initial_mtu(mut self, initial_mtu: u16) -> Result<Self, MtuError> {
396 if let Some(base_mtu) = self.base_mtu {
397 ensure!(initial_mtu >= base_mtu.0.get(), Err(MtuError));
398 }
399
400 if let Some(max_mtu) = self.max_mtu {
401 ensure!(initial_mtu <= max_mtu.0.get(), Err(MtuError));
402 }
403
404 self.initial_mtu = Some(initial_mtu.try_into()?);
405 Ok(self)
406 }
407
408 /// Sets the largest maximum transmission unit (MTU) that can be sent on a path (default: 1500)
409 ///
410 /// For a detailed description see the [with_base_mtu] documentation in the IO provider.
411 ///
412 /// [with_base_mtu]: https://docs.rs/s2n-quic/latest/s2n_quic/provider/io/tokio/struct.Builder.html#method.with_base_mtu
413 pub fn with_base_mtu(mut self, base_mtu: u16) -> Result<Self, MtuError> {
414 if let Some(initial_mtu) = self.initial_mtu {
415 ensure!(initial_mtu.0.get() >= base_mtu, Err(MtuError));
416 }
417
418 if let Some(max_mtu) = self.max_mtu {
419 ensure!(base_mtu <= max_mtu.0.get(), Err(MtuError));
420 }
421
422 self.base_mtu = Some(base_mtu.try_into()?);
423 Ok(self)
424 }
425
426 /// Sets the largest maximum transmission unit (MTU) that can be sent on a path (default: 1500)
427 ///
428 /// Application must ensure that max_mtu <= endpoint_mtu_config.max_mtu(). For a detailed
429 /// description see the [with_max_mtu] documentation in the IO provider.
430 ///
431 /// [with_max_mtu]: https://docs.rs/s2n-quic/latest/s2n_quic/provider/io/tokio/struct.Builder.html#method.with_max_mtu
432 pub fn with_max_mtu(mut self, max_mtu: u16) -> Result<Self, MtuError> {
433 if let Some(initial_mtu) = self.initial_mtu {
434 ensure!(initial_mtu.0.get() <= max_mtu, Err(MtuError));
435 }
436
437 if let Some(base_mtu) = self.base_mtu {
438 ensure!(base_mtu.0.get() <= max_mtu, Err(MtuError));
439 }
440
441 self.max_mtu = Some(max_mtu.try_into()?);
442 Ok(self)
443 }
444
445 pub fn build(self) -> Result<Config, MtuError> {
446 let base_mtu = self.base_mtu.unwrap_or_default();
447 let max_mtu = self.max_mtu.unwrap_or_default();
448 let mut initial_mtu = self.initial_mtu.unwrap_or_default();
449
450 if self.initial_mtu.is_none() {
451 // The initial_mtu was not configured, so adjust the value from the default
452 // based on the default or configured base and max MTUs
453 initial_mtu = initial_mtu
454 .0
455 .max(base_mtu.0)
456 .min(max_mtu.0)
457 .get()
458 .try_into()?
459 };
460
461 let config = Config {
462 initial_mtu,
463 max_mtu,
464 base_mtu,
465 };
466
467 ensure!(config.is_valid(), Err(MtuError));
468 Ok(config)
469 }
470}
471
472#[derive(Eq, PartialEq, Debug)]
473pub enum MtuResult {
474 NoChange,
475 MtuUpdated(u16),
476}
477
478#[derive(Clone, Debug)]
479pub struct Controller {
480 state: State,
481 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.2
482 //# The BASE_PLPMTU is a configured size expected to work for most paths.
483 //# The size is equal to or larger than the MIN_PLPMTU and smaller than
484 //# the MAX_PLPMTU.
485 base_plpmtu: u16,
486 //= https://www.rfc-editor.org/rfc/rfc8899#section-2
487 //# The Packetization Layer PMTU is an estimate of the largest size
488 //# of PL datagram that can be sent by a path, controlled by PLPMTUD
489 plpmtu: u16,
490 /// The maximum size the UDP payload can reach for any probe packet.
491 max_udp_payload: u16,
492 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.3
493 //# The PROBED_SIZE is the size of the current probe packet
494 //# as determined at the PL. This is a tentative value for the
495 //# PLPMTU, which is awaiting confirmation by an acknowledgment.
496 probed_size: u16,
497 /// The maximum size datagram to probe for. In contrast to the max_udp_payload,
498 /// this value will decrease if probes are not acknowledged.
499 max_probe_size: u16,
500 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.3
501 //# The PROBE_COUNT is a count of the number of successive
502 //# unsuccessful probe packets that have been sent.
503 probe_count: u8,
504 /// A count of the number of packets with a size > base_plpmtu lost since
505 /// the last time a packet with size equal to the current MTU was acknowledged.
506 black_hole_counter: Counter<u8, Saturating>,
507 /// The largest acknowledged packet with size >= the plpmtu. Used when tracking
508 /// packets that have been lost for the purpose of detecting a black hole.
509 largest_acked_mtu_sized_packet: Option<PacketNumber>,
510 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.1.1
511 //# The PMTU_RAISE_TIMER is configured to the period a
512 //# sender will continue to use the current PLPMTU, after which it
513 //# reenters the Search Phase.
514 pmtu_raise_timer: Timer,
515 /// Flag indicating we need to send MtuProbingComplete frame to the peer
516 needs_to_send_completion: bool,
517 /// Flag indicating MtuProbingComplete frame is enabled
518 mtu_probing_complete_support: bool,
519}
520
521impl Controller {
522 /// Construct a new mtu::Controller with the given `max_mtu` and `peer_socket_address`
523 ///
524 /// The UDP header length and IP header length will be subtracted from `max_mtu` to
525 /// determine the max_udp_payload used for limiting the payload length of probe packets.
526 /// max_mtu is the maximum allowed mtu, e.g. for jumbo frames this value is expected to
527 /// be over 9000.
528 #[inline]
529 pub fn new(config: Config, peer_socket_address: &inet::SocketAddress) -> Self {
530 debug_assert!(config.is_valid(), "Invalid MTU configuration {config:?}");
531
532 //= https://www.rfc-editor.org/rfc/rfc9000#section-14.3
533 //# Endpoints SHOULD set the initial value of BASE_PLPMTU (Section 5.1 of
534 //# [DPLPMTUD]) to be consistent with QUIC's smallest allowed maximum
535 //# datagram size.
536 let base_plpmtu = config.base_mtu.max_datagram_size(peer_socket_address);
537 let max_udp_payload = config.max_mtu.max_datagram_size(peer_socket_address);
538
539 //= https://www.rfc-editor.org/rfc/rfc9000#section-14.1
540 //# Datagrams containing Initial packets MAY exceed 1200 bytes if the sender
541 //# believes that the network path and peer both support the size that it chooses.
542 let plpmtu = config.initial_mtu.max_datagram_size(peer_socket_address);
543
544 let initial_probed_size = if u16::from(config.initial_mtu) > ETHERNET_MTU - PROBE_THRESHOLD
545 {
546 // An initial MTU was provided within the probe threshold of the Ethernet MTU, so we can
547 // instead try probing for an MTU larger than the Ethernet MTU
548 Self::next_probe_size(plpmtu, max_udp_payload)
549 } else {
550 // The UDP payload size for the most likely MTU is based on standard Ethernet MTU minus
551 // the minimum length IP headers (without IPv4 options or IPv6 extensions) and UPD header
552 let min_ip_header_len = match peer_socket_address {
553 inet::SocketAddress::IpV4(_) => IPV4_MIN_HEADER_LEN,
554 inet::SocketAddress::IpV6(_) => IPV6_MIN_HEADER_LEN,
555 };
556 ETHERNET_MTU - UDP_HEADER_LEN - min_ip_header_len
557 }
558 .min(max_udp_payload);
559
560 let state = if plpmtu > base_plpmtu {
561 // The initial MTU has been configured higher than the base MTU
562 State::EarlySearchRequested
563 } else if initial_probed_size - base_plpmtu < PROBE_THRESHOLD {
564 // The next probe size is within the probe threshold of the
565 // base MTU, so no probing will occur and the search is complete
566 State::SearchComplete
567 } else {
568 // Otherwise wait for regular MTU probing to be enabled
569 State::Disabled
570 };
571
572 Self {
573 state,
574 base_plpmtu,
575 plpmtu,
576 probed_size: initial_probed_size,
577 max_udp_payload,
578 max_probe_size: max_udp_payload,
579 probe_count: 0,
580 black_hole_counter: Default::default(),
581 largest_acked_mtu_sized_packet: None,
582 pmtu_raise_timer: Timer::default(),
583 needs_to_send_completion: false,
584 mtu_probing_complete_support: false,
585 }
586 }
587
588 /// Enable MtuProbingComplete Support
589 #[inline]
590 pub fn enable_mtu_probing_complete_support(&mut self) {
591 self.mtu_probing_complete_support = true;
592 }
593
594 /// Enable path MTU probing
595 #[inline]
596 pub fn enable(&mut self) {
597 // ensure we haven't already enabled the controller
598 ensure!(self.state.is_disabled() || self.state.is_early_search_requested());
599
600 // TODO: Look up current MTU in a cache. If there is a cache hit
601 // move directly to SearchComplete and arm the PMTU raise timer.
602 // Otherwise, start searching for a larger PMTU immediately
603 self.request_new_search(None);
604 }
605
606 /// Called when the connection timer expires
607 #[inline]
608 pub fn on_timeout(&mut self, now: Timestamp) {
609 ensure!(self.pmtu_raise_timer.poll_expiration(now).is_ready());
610 self.request_new_search(None);
611 }
612
613 //= https://www.rfc-editor.org/rfc/rfc8899#section-4.2
614 //# When
615 //# supported, this mechanism MAY also be used by DPLPMTUD to acknowledge
616 //# reception of a probe packet.
617 /// This method gets called when a packet delivery got acknowledged
618 #[inline]
619 pub fn on_packet_ack<CC: CongestionController, Pub: event::ConnectionPublisher>(
620 &mut self,
621 packet_number: PacketNumber,
622 sent_bytes: u16,
623 congestion_controller: &mut CC,
624 path_id: path::Id,
625 publisher: &mut Pub,
626 ) -> MtuResult {
627 if self.state.is_early_search_requested() && sent_bytes > self.base_plpmtu {
628 if self.is_next_probe_size_above_threshold() {
629 // Early probing has succeeded, but the max MTU is higher still so
630 // wait for regular MTU probing to be enabled to attempt higher MTUs
631 self.state = State::Disabled;
632 } else {
633 self.state = State::SearchComplete;
634 }
635
636 // Publish an `on_mtu_updated` event since the cause
637 // and possibly search_complete status have changed
638 publisher.on_mtu_updated(event::builder::MtuUpdated {
639 path_id: path_id.into_event(),
640 mtu: self.plpmtu,
641 cause: MtuUpdatedCause::InitialMtuPacketAcknowledged,
642 search_complete: self.state.is_search_complete(),
643 });
644 }
645
646 // no need to process anything in the disabled state
647 ensure!(self.state != State::Disabled, MtuResult::NoChange);
648
649 // MTU probes are only sent in application data space
650 ensure!(
651 packet_number.space().is_application_data(),
652 MtuResult::NoChange
653 );
654
655 if sent_bytes >= self.plpmtu
656 && self
657 .largest_acked_mtu_sized_packet
658 .is_none_or(|pn| packet_number > pn)
659 {
660 // Reset the black hole counter since a packet the size of the current MTU or larger
661 // has been acknowledged, indicating the path can still support the current MTU
662 self.black_hole_counter = Default::default();
663 self.largest_acked_mtu_sized_packet = Some(packet_number);
664 }
665
666 if let State::Searching(probe_packet_number, transmit_time) = self.state {
667 if packet_number == probe_packet_number {
668 self.plpmtu = self.probed_size;
669 // A new MTU has been confirmed, notify the congestion controller
670 congestion_controller.on_mtu_update(
671 self.plpmtu,
672 &mut congestion_controller::PathPublisher::new(publisher, path_id),
673 );
674
675 self.update_probed_size();
676
677 //= https://www.rfc-editor.org/rfc/rfc8899#section-8
678 //# To avoid excessive load, the interval between individual probe
679 //# packets MUST be at least one RTT, and the interval between rounds of
680 //# probing is determined by the PMTU_RAISE_TIMER.
681
682 // Subsequent probe packets are sent based on the round trip transmission and
683 // acknowledgement/loss of a packet, so the interval will be at least 1 RTT.
684 self.request_new_search(Some(transmit_time));
685
686 publisher.on_mtu_updated(event::builder::MtuUpdated {
687 path_id: path_id.into_event(),
688 mtu: self.plpmtu,
689 cause: MtuUpdatedCause::ProbeAcknowledged,
690 search_complete: self.state.is_search_complete(),
691 });
692
693 return MtuResult::MtuUpdated(self.plpmtu);
694 }
695 }
696
697 MtuResult::NoChange
698 }
699
700 //= https://www.rfc-editor.org/rfc/rfc8899#section-3
701 //# The PL is REQUIRED to be
702 //# robust in the case where probe packets are lost due to other
703 //# reasons (including link transmission error, congestion).
704 /// This method gets called when a packet loss is reported
705 #[inline]
706 pub fn on_packet_loss<CC: CongestionController, Pub: event::ConnectionPublisher>(
707 &mut self,
708 packet_number: PacketNumber,
709 lost_bytes: u16,
710 new_loss_burst: bool,
711 now: Timestamp,
712 congestion_controller: &mut CC,
713 path_id: path::Id,
714 publisher: &mut Pub,
715 ) -> MtuResult {
716 // MTU probes are only sent in the application data space, but since early packet
717 // spaces will use the `InitialMtu` prior to MTU probing being enabled, we need
718 // to check for potentially MTU-related packet loss if an early search has been requested
719 ensure!(
720 self.state.is_early_search_requested() || packet_number.space().is_application_data(),
721 MtuResult::NoChange
722 );
723
724 match &self.state {
725 State::Disabled => {}
726 State::EarlySearchRequested => {
727 // MTU probing hasn't been enabled yet, but since the initial MTU was configured
728 // higher than the base PLPMTU and this setting resulted in a lost packet
729 // we drop back down to the base PLPMTU.
730 self.plpmtu = self.base_plpmtu;
731
732 congestion_controller.on_mtu_update(
733 self.plpmtu,
734 &mut congestion_controller::PathPublisher::new(publisher, path_id),
735 );
736
737 if self.is_next_probe_size_above_threshold() {
738 // Resume regular probing when the MTU controller is enabled
739 self.state = State::Disabled;
740 } else {
741 // The next probe is within the threshold, so move directly
742 // to the SearchComplete state
743 self.state = State::SearchComplete;
744 }
745
746 publisher.on_mtu_updated(event::builder::MtuUpdated {
747 path_id: path_id.into_event(),
748 mtu: self.plpmtu,
749 cause: MtuUpdatedCause::InitialMtuPacketLost,
750 search_complete: self.state.is_search_complete(),
751 });
752
753 return MtuResult::MtuUpdated(self.plpmtu);
754 }
755 State::Searching(probe_pn, _) if *probe_pn == packet_number => {
756 // The MTU probe was lost
757 if self.probe_count == MAX_PROBES {
758 // We've sent MAX_PROBES without acknowledgement, so
759 // attempt a smaller probe size
760 self.max_probe_size = self.probed_size;
761 self.update_probed_size();
762 self.request_new_search(None);
763
764 if self.is_search_completed() {
765 // Emit an on_mtu_updated event as the search has now completed
766 publisher.on_mtu_updated(event::builder::MtuUpdated {
767 path_id: path_id.into_event(),
768 mtu: self.plpmtu,
769 cause: MtuUpdatedCause::LargerProbesLost,
770 search_complete: true,
771 })
772 }
773 } else {
774 // Try the same probe size again
775 self.state = State::SearchRequested
776 }
777 }
778 State::Searching(_, _) | State::SearchComplete | State::SearchRequested => {
779 if (self.base_plpmtu + 1..=self.plpmtu).contains(&lost_bytes)
780 && self
781 .largest_acked_mtu_sized_packet
782 .is_none_or(|pn| packet_number > pn)
783 && new_loss_burst
784 {
785 // A non-probe packet larger than the BASE_PLPMTU that was sent after the last
786 // acknowledged MTU-sized packet has been lost
787 self.black_hole_counter += 1;
788 }
789
790 if self.black_hole_counter > BLACK_HOLE_THRESHOLD {
791 return self.on_black_hole_detected(
792 now,
793 congestion_controller,
794 path_id,
795 publisher,
796 );
797 }
798 }
799 }
800
801 MtuResult::NoChange
802 }
803
804 /// Gets the currently validated maximum QUIC datagram size
805 ///
806 /// This does not include the size of UDP and IP headers.
807 #[inline]
808 pub fn max_datagram_size(&self) -> usize {
809 self.plpmtu as usize
810 }
811
812 /// Gets the max datagram size currently being probed for
813 #[inline]
814 pub fn probed_sized(&self) -> usize {
815 self.probed_size as usize
816 }
817
818 /// Returns true if probing for the MTU has completed
819 pub fn is_search_completed(&self) -> bool {
820 self.state.is_search_complete()
821 }
822
823 /// Sets `probed_size` to the next MTU size to probe for based on a binary search
824 #[inline]
825 fn update_probed_size(&mut self) {
826 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.3.2
827 //# Implementations SHOULD select the set of probe packet sizes to
828 //# maximize the gain in PLPMTU from each search step.
829 self.probed_size = Self::next_probe_size(self.plpmtu, self.max_probe_size);
830 }
831
832 /// Calculates the next probe size as halfway from the current to the max size
833 #[inline]
834 fn next_probe_size(current: u16, max: u16) -> u16 {
835 current + ((max - current) / 2)
836 }
837
838 #[inline]
839 fn is_next_probe_size_above_threshold(&self) -> bool {
840 self.probed_size - self.plpmtu >= PROBE_THRESHOLD
841 }
842
843 /// Requests a new search to be initiated
844 ///
845 /// If `last_probe_time` is supplied, the PMTU Raise Timer will be armed as
846 /// necessary if the probed_size is already within the PROBE_THRESHOLD
847 /// of the current PLPMTU
848 #[inline]
849 fn request_new_search(&mut self, last_probe_time: Option<Timestamp>) {
850 if self.is_next_probe_size_above_threshold() {
851 self.probe_count = 0;
852 self.state = State::SearchRequested;
853 } else {
854 // The next probe size is within the threshold of the current MTU
855 // so its not worth additional probing.
856 self.state = State::SearchComplete;
857
858 // Mark that we need to send the MtuProbingComplete frame to the peer.
859 // MtuProbingComplete is not a IETF QUIC frame, so we only send it if dcQUIC is enabled.
860 if !self.needs_to_send_completion && self.mtu_probing_complete_support {
861 self.needs_to_send_completion = true;
862 }
863
864 if let Some(last_probe_time) = last_probe_time {
865 self.arm_pmtu_raise_timer(last_probe_time + PMTU_RAISE_TIMER_DURATION);
866 }
867 }
868 }
869
870 /// Called when an excessive number of packets larger than the BASE_PLPMTU have been lost
871 #[inline]
872 fn on_black_hole_detected<CC: CongestionController, Pub: event::ConnectionPublisher>(
873 &mut self,
874 now: Timestamp,
875 congestion_controller: &mut CC,
876 path_id: path::Id,
877 publisher: &mut Pub,
878 ) -> MtuResult {
879 self.black_hole_counter = Default::default();
880 self.largest_acked_mtu_sized_packet = None;
881 // Reset the plpmtu back to the base_plpmtu and notify the congestion controller
882 self.plpmtu = self.base_plpmtu;
883 congestion_controller.on_mtu_update(
884 self.plpmtu,
885 &mut congestion_controller::PathPublisher::new(publisher, path_id),
886 );
887 // Cancel any current probes
888 self.state = State::SearchComplete;
889 // Arm the PMTU raise timer to try a larger MTU again after a cooling off period
890 self.arm_pmtu_raise_timer(now + BLACK_HOLE_COOL_OFF_DURATION);
891
892 publisher.on_mtu_updated(event::builder::MtuUpdated {
893 path_id: path_id.into_event(),
894 mtu: self.plpmtu,
895 cause: MtuUpdatedCause::Blackhole,
896 search_complete: self.state.is_search_complete(),
897 });
898
899 MtuResult::MtuUpdated(self.plpmtu)
900 }
901
902 /// Arm the PMTU Raise Timer if there is still room to increase the
903 /// MTU before hitting the max plpmtu
904 #[inline]
905 fn arm_pmtu_raise_timer(&mut self, timestamp: Timestamp) {
906 // Reset the max_probe_size to the max_udp_payload to allow for larger probe sizes
907 self.max_probe_size = self.max_udp_payload;
908 self.update_probed_size();
909
910 if self.is_next_probe_size_above_threshold() {
911 // There is still some room to try a larger MTU again,
912 // so arm the pmtu raise timer
913 self.pmtu_raise_timer.set(timestamp);
914 }
915 }
916}
917
918impl timer::Provider for Controller {
919 #[inline]
920 fn timers<Q: timer::Query>(&self, query: &mut Q) -> timer::Result {
921 self.pmtu_raise_timer.timers(query)?;
922
923 Ok(())
924 }
925}
926
927impl transmission::Provider for Controller {
928 /// Queries the component for any outgoing frames that need to get sent
929 ///
930 /// This method assumes that no other data (other than the packet header) has been written
931 /// to the supplied `WriteContext`. This necessitates the caller ensuring the probe packet
932 /// written by this method to be in its own connection transmission.
933 #[inline]
934 fn on_transmit<W: transmission::Writer>(&mut self, context: &mut W) {
935 // Send MtuProbingComplete frame if needed and DC is enabled
936 if self.needs_to_send_completion {
937 let frame = frame::MtuProbingComplete::new(self.plpmtu);
938 if context.write_frame(&frame).is_some() {
939 self.needs_to_send_completion = false;
940 }
941 return;
942 }
943
944 //= https://www.rfc-editor.org/rfc/rfc8899#section-5.2
945 //# When used with an acknowledged PL (e.g., SCTP), DPLPMTUD SHOULD NOT continue to
946 //# generate PLPMTU probes in this state.
947 ensure!(self.state == State::SearchRequested);
948
949 ensure!(context.transmission_mode().is_mtu_probing());
950
951 // Each packet contains overhead in the form of a packet header and an authentication tag.
952 // This overhead contributes to the overall size of the packet, so the payload we write
953 // to the packet will account for this overhead to reach the target probed size.
954 let probe_payload_size =
955 self.probed_size as usize - context.header_len() - context.tag_len();
956
957 if context.remaining_capacity() < probe_payload_size {
958 // There isn't enough capacity in the buffer to write the datagram we
959 // want to probe, so we've reached the maximum pmtu and the search is complete.
960 self.state = State::SearchComplete;
961 return;
962 }
963
964 //= https://www.rfc-editor.org/rfc/rfc9000#section-14.4
965 //# Endpoints could limit the content of PMTU probes to PING and PADDING
966 //# frames, since packets that are larger than the current maximum
967 //# datagram size are more likely to be dropped by the network.
968
969 //= https://www.rfc-editor.org/rfc/rfc8899#section-3
970 //# Probe loss recovery: It is RECOMMENDED to use probe packets that
971 //# do not carry any user data that would require retransmission if
972 //# lost.
973
974 //= https://www.rfc-editor.org/rfc/rfc8899#section-4.1
975 //# DPLPMTUD MAY choose to use only one of these methods to simplify the
976 //# implementation.
977
978 context.write_frame(&frame::Ping);
979 let padding_size = probe_payload_size - frame::Ping.encoding_size();
980 if let Some(packet_number) = context.write_frame(&frame::Padding {
981 length: padding_size,
982 }) {
983 self.probe_count += 1;
984 self.state = State::Searching(packet_number, context.current_time());
985 }
986 }
987}
988
989impl transmission::interest::Provider for Controller {
990 #[inline]
991 fn transmission_interest<Q: transmission::interest::Query>(
992 &self,
993 query: &mut Q,
994 ) -> transmission::interest::Result {
995 match self.state {
996 State::SearchRequested => query.on_new_data()?,
997 State::SearchComplete => {
998 // Indicate interest if we need to send the MtuProbingComplete frame
999 if self.needs_to_send_completion {
1000 query.on_new_data()?
1001 }
1002 }
1003 _ => {}
1004 }
1005
1006 Ok(())
1007 }
1008}