s2n_quic_core/recovery/cubic.rs
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5 counter::Counter,
6 event::builder::SlowStartExitCause,
7 random,
8 recovery::{
9 congestion_controller::{self, CongestionController, Publisher},
10 cubic::{FastRetransmission::*, State::*},
11 hybrid_slow_start::HybridSlowStart,
12 pacing::Pacer,
13 RttEstimator,
14 },
15 time::Timestamp,
16};
17use core::{
18 cmp::{max, min},
19 time::Duration,
20};
21#[cfg(not(feature = "std"))]
22use num_traits::Float as _;
23
24//= https://www.rfc-editor.org/rfc/rfc9002#section-7.3
25//# New Path or +------------+
26//# persistent congestion | Slow |
27//# (O)---------------------->| Start |
28//# +------------+
29//# |
30//# Loss or |
31//# ECN-CE increase |
32//# v
33//# +------------+ Loss or +------------+
34//# | Congestion | ECN-CE increase | Recovery |
35//# | Avoidance |------------------>| Period |
36//# +------------+ +------------+
37//# ^ |
38//# | |
39//# +----------------------------+
40//# Acknowledgment of packet
41//# sent during recovery
42// This implementation uses Hybrid Slow Start, which allows for
43// Slow Start to exit directly to Congestion Avoidance.
44#[derive(Clone, Debug, PartialEq, Eq)]
45enum State {
46 SlowStart,
47 Recovery(Timestamp, FastRetransmission),
48 CongestionAvoidance(CongestionAvoidanceTiming),
49}
50
51impl State {
52 /// Returns State::CongestionAvoidance initialized with the given `start_time`
53 fn congestion_avoidance(start_time: Timestamp) -> Self {
54 Self::CongestionAvoidance(CongestionAvoidanceTiming {
55 start_time,
56 window_increase_time: start_time,
57 app_limited_time: None,
58 })
59 }
60
61 /// Called when app limited after sending has completed for a round and an ACK has been received.
62 fn on_app_limited(&mut self, timestamp: Timestamp) {
63 if let CongestionAvoidance(ref mut timing) = self {
64 debug_assert!(
65 timing
66 .app_limited_time
67 .is_none_or(|app_limited_time| timestamp >= app_limited_time),
68 "timestamp must be monotonically increasing"
69 );
70 debug_assert!(
71 timestamp >= timing.window_increase_time,
72 "timestamp must not be before the window was last increased"
73 );
74
75 timing.app_limited_time = Some(timestamp);
76 }
77 }
78
79 /// Returns true if the state is `SlowStart`
80 fn is_slow_start(&self) -> bool {
81 matches!(self, SlowStart)
82 }
83}
84
85//= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
86//# If the congestion window is reduced immediately, a
87//# single packet can be sent prior to reduction. This speeds up loss
88//# recovery if the data in the lost packet is retransmitted and is
89//# similar to TCP as described in Section 5 of [RFC6675].
90#[derive(Clone, Debug, PartialEq, Eq)]
91enum FastRetransmission {
92 Idle,
93 RequiresTransmission,
94}
95
96#[derive(Clone, Debug, PartialEq, Eq)]
97struct CongestionAvoidanceTiming {
98 // The time the congestion avoidance state was entered
99 start_time: Timestamp,
100 // The time the congestion window was last increased
101 window_increase_time: Timestamp,
102 // The last time the current congestion window was underutilized
103 app_limited_time: Option<Timestamp>,
104}
105
106impl CongestionAvoidanceTiming {
107 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
108 //# t is the elapsed time from the beginning of the current congestion avoidance
109 fn t(&self, timestamp: Timestamp) -> Duration {
110 timestamp - self.start_time
111 }
112
113 /// Called when the congestion window is being increased.
114 ///
115 /// Adjusts the start time by the period from the last window increase to the app limited time
116 /// to avoid counting the app limited time period in W_cubic.
117 fn on_window_increase(&mut self, timestamp: Timestamp) {
118 if let Some(app_limited_time) = self.app_limited_time.take() {
119 //= https://www.rfc-editor.org/rfc/rfc8312#section-5.8
120 //# CUBIC does not raise its congestion window size if the flow is
121 //# currently limited by the application instead of the congestion
122 //# window. In case of long periods when cwnd has not been updated due
123 //# to the application rate limit, such as idle periods, t in Eq. 1 MUST
124 //# NOT include these periods; otherwise, W_cubic(t) might be very high
125 //# after restarting from these periods.
126
127 // Adjust the congestion avoidance start time by the app limited duration
128 self.start_time += app_limited_time - self.window_increase_time;
129 }
130
131 self.window_increase_time = timestamp;
132 }
133}
134
135/// A congestion controller that implements "CUBIC for Fast Long-Distance Networks"
136/// as specified in <https://tools.ietf.org/html/rfc8312>. The Hybrid Slow Start algorithm
137/// is used for determining the slow start threshold.
138#[derive(Clone, Debug)]
139pub struct CubicCongestionController {
140 cubic: Cubic,
141 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
142 //# CUBIC MUST employ a slow-start algorithm, when the cwnd is no more
143 //# than ssthresh.
144
145 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
146 //# Among the slow-start algorithms, CUBIC MAY choose the
147 //# standard TCP slow start [RFC5681] in general networks, or the limited
148 //# slow start [RFC3742] or hybrid slow start [HR08] for fast and long-
149 //# distance networks.
150 slow_start: HybridSlowStart,
151 pacer: Pacer,
152 max_datagram_size: u16,
153 congestion_window: f32,
154 state: State,
155 //= https://www.rfc-editor.org/rfc/rfc9002#appendix-B.2
156 //# The sum of the size in bytes of all sent packets
157 //# that contain at least one ack-eliciting or PADDING frame and have
158 //# not been acknowledged or declared lost. The size does not include
159 //# IP or UDP overhead, but does include the QUIC header and
160 //# Authenticated Encryption with Associated Data (AEAD) overhead.
161 //# Packets only containing ACK frames do not count toward
162 //# bytes_in_flight to ensure congestion control does not impede
163 //# congestion feedback.
164
165 // ACK + PADDING packets do not contribute to bytes_in_flight
166 // See https://github.com/aws/s2n-quic/pull/1514
167 bytes_in_flight: BytesInFlight,
168 time_of_last_sent_packet: Option<Timestamp>,
169 under_utilized: bool,
170 // The highest number of bytes in flight seen when an ACK was received,
171 // since the last congestion event.
172 bytes_in_flight_hi: BytesInFlight,
173}
174
175type BytesInFlight = Counter<u32>;
176
177impl CongestionController for CubicCongestionController {
178 type PacketInfo = ();
179
180 #[inline]
181 fn congestion_window(&self) -> u32 {
182 self.congestion_window as u32
183 }
184
185 #[inline]
186 fn bytes_in_flight(&self) -> u32 {
187 *self.bytes_in_flight
188 }
189
190 #[inline]
191 fn is_congestion_limited(&self) -> bool {
192 let available_congestion_window = self
193 .congestion_window()
194 .saturating_sub(*self.bytes_in_flight);
195 available_congestion_window < self.max_datagram_size as u32
196 }
197
198 #[inline]
199 fn requires_fast_retransmission(&self) -> bool {
200 matches!(self.state, Recovery(_, RequiresTransmission))
201 }
202
203 #[inline]
204 fn on_packet_sent<Pub: Publisher>(
205 &mut self,
206 time_sent: Timestamp,
207 bytes_sent: usize,
208 app_limited: Option<bool>,
209 rtt_estimator: &RttEstimator,
210 publisher: &mut Pub,
211 ) {
212 if bytes_sent == 0 {
213 // Packet was not congestion controlled
214 return;
215 }
216
217 self.bytes_in_flight
218 .try_add(bytes_sent)
219 .expect("bytes sent should not exceed u32::MAX");
220
221 if let Some(app_limited) = app_limited {
222 // We check both the given `app_limited` value and is_congestion_window_under_utilized()
223 // as is_congestion_window_under_utilized() is more lenient with respect to the utilization
224 // of the congestion window than the app_limited check. is_congestion_window_under_utilized()
225 // returns true if there are more than 3 MTU's of space left in the cwnd, or less than
226 // half the cwnd is utilized in slow start.
227 self.under_utilized = app_limited && self.is_congestion_window_under_utilized();
228 } else {
229 // We don't externally determine `app_limited` in the Initial and Handshake packet
230 // spaces, so set under_utilized based on is_congestion_window_under_utilized alone
231 self.under_utilized = self.is_congestion_window_under_utilized();
232 }
233
234 if let Recovery(recovery_start_time, RequiresTransmission) = self.state {
235 // A packet has been sent since we entered recovery (fast retransmission)
236 // so flip the state back to idle.
237 self.state = Recovery(recovery_start_time, Idle);
238 }
239
240 self.time_of_last_sent_packet = Some(time_sent);
241
242 self.pacer.on_packet_sent(
243 time_sent,
244 bytes_sent,
245 rtt_estimator,
246 self.congestion_window(),
247 self.max_datagram_size,
248 self.state.is_slow_start(),
249 publisher,
250 );
251 }
252
253 #[inline]
254 fn on_rtt_update<Pub: Publisher>(
255 &mut self,
256 time_sent: Timestamp,
257 now: Timestamp,
258 rtt_estimator: &RttEstimator,
259 publisher: &mut Pub,
260 ) {
261 // Update the Slow Start algorithm each time the RTT
262 // estimate is updated to find the slow start threshold.
263 self.slow_start.on_rtt_update(
264 self.congestion_window,
265 time_sent,
266 self.time_of_last_sent_packet
267 .expect("At least one packet must be sent to update RTT"),
268 rtt_estimator.latest_rtt(),
269 );
270
271 if self.state.is_slow_start() && self.congestion_window >= self.slow_start.threshold {
272 publisher.on_slow_start_exited(SlowStartExitCause::Rtt, self.congestion_window());
273 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
274 //# In the case when CUBIC runs the hybrid slow start [HR08], it may exit
275 //# the first slow start without incurring any packet loss and thus W_max
276 //# is undefined. In this special case, CUBIC switches to congestion
277 //# avoidance and increases its congestion window size using Eq. 1, where
278 //# t is the elapsed time since the beginning of the current congestion
279 //# avoidance, K is set to 0, and W_max is set to the congestion window
280 //# size at the beginning of the current congestion avoidance.
281 self.state = State::congestion_avoidance(now);
282 self.cubic.on_slow_start_exit(self.congestion_window);
283 }
284 }
285
286 #[inline]
287 fn on_ack<Pub: Publisher>(
288 &mut self,
289 newest_acked_time_sent: Timestamp,
290 bytes_acknowledged: usize,
291 _newest_acked_packet_info: Self::PacketInfo,
292 rtt_estimator: &RttEstimator,
293 _random_generator: &mut dyn random::Generator,
294 ack_receive_time: Timestamp,
295 publisher: &mut Pub,
296 ) {
297 self.bytes_in_flight_hi = self.bytes_in_flight_hi.max(self.bytes_in_flight);
298 self.bytes_in_flight
299 .try_sub(bytes_acknowledged)
300 .expect("bytes_acknowledged should not exceed u32::MAX");
301
302 if self.under_utilized {
303 self.state.on_app_limited(ack_receive_time);
304
305 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.8
306 //# When bytes in flight is smaller than the congestion window and
307 //# sending is not pacing limited, the congestion window is
308 //# underutilized. This can happen due to insufficient application data
309 //# or flow control limits. When this occurs, the congestion window
310 //# SHOULD NOT be increased in either slow start or congestion avoidance.
311 return;
312 }
313
314 // Check if this ack causes the controller to exit recovery
315 if let State::Recovery(recovery_start_time, _) = self.state {
316 if newest_acked_time_sent > recovery_start_time {
317 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
318 //# A recovery period ends and the sender enters congestion avoidance
319 //# when a packet sent during the recovery period is acknowledged.
320 self.state = State::congestion_avoidance(ack_receive_time)
321 }
322 };
323
324 // The congestion window may continue to grow while app-limited due to pacing
325 // interrupting sending while momentarily not app-limited. To avoid the congestion
326 // window growing too far beyond bytes in flight, we limit the maximum cwnd to
327 // the highest bytes_in_flight value seen since the last congestion event multiplied
328 // by a multiplier depending on the current state.
329 const SLOW_START_MAX_CWND_MULTIPLIER: f32 = 2.0;
330 const MAX_CWND_MULTIPLIER: f32 = 1.5;
331 let max_cwnd = match self.state {
332 SlowStart => *self.bytes_in_flight_hi as f32 * SLOW_START_MAX_CWND_MULTIPLIER,
333 Recovery(_, _) => self.congestion_window,
334 CongestionAvoidance(_) => *self.bytes_in_flight_hi as f32 * MAX_CWND_MULTIPLIER,
335 }
336 .max(self.cubic.minimum_window());
337
338 if self.congestion_window >= max_cwnd {
339 // The window is already larger than the max, so we can return early
340 return;
341 }
342
343 match self.state {
344 SlowStart => {
345 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.1
346 //# While a sender is in slow start, the congestion window increases by
347 //# the number of bytes acknowledged when each acknowledgment is
348 //# processed. This results in exponential growth of the congestion
349 //# window.
350 self.congestion_window = (self.congestion_window
351 + self.slow_start.cwnd_increment(bytes_acknowledged))
352 .min(max_cwnd);
353
354 if self.congestion_window >= self.slow_start.threshold {
355 // The congestion window has exceeded a previously determined slow start threshold
356 // so transition to congestion avoidance and notify cubic of the slow start exit
357 publisher
358 .on_slow_start_exited(SlowStartExitCause::Other, self.congestion_window());
359 self.state = State::congestion_avoidance(ack_receive_time);
360 self.cubic.on_slow_start_exit(self.congestion_window);
361 }
362 }
363 Recovery(_, _) => {
364 // Don't increase the congestion window while in recovery
365 }
366 CongestionAvoidance(ref mut timing) => {
367 timing.on_window_increase(ack_receive_time);
368
369 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
370 //# t is the elapsed time from the beginning of the current congestion avoidance
371 let t = timing.t(ack_receive_time);
372
373 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
374 //# RTT is the weighted average RTT
375 // TODO: Linux Kernel Cubic implementation uses min RTT, possibly
376 // because it is more stable than smoothed_rtt. Other implementations
377 // have followed Linux's choice, so we will as well. The end result is a more
378 // conservative rate of increase of the congestion window. This requires
379 // investigation and testing to evaluate if smoothed_rtt would be a better input.
380 let rtt = rtt_estimator.min_rtt();
381
382 self.congestion_avoidance(t, rtt, bytes_acknowledged, max_cwnd);
383 }
384 };
385
386 debug_assert!(self.congestion_window >= self.cubic.minimum_window());
387 }
388
389 #[inline]
390 fn on_packet_lost<Pub: Publisher>(
391 &mut self,
392 lost_bytes: u32,
393 _packet_info: Self::PacketInfo,
394 persistent_congestion: bool,
395 _new_loss_burst: bool,
396 _random_generator: &mut dyn random::Generator,
397 timestamp: Timestamp,
398 publisher: &mut Pub,
399 ) {
400 debug_assert!(lost_bytes > 0);
401
402 self.bytes_in_flight -= lost_bytes;
403
404 if self.state.is_slow_start() && !persistent_congestion {
405 publisher
406 .on_slow_start_exited(SlowStartExitCause::PacketLoss, self.congestion_window());
407 }
408
409 self.on_congestion_event(timestamp);
410
411 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.6.2
412 //# When persistent congestion is declared, the sender's congestion
413 //# window MUST be reduced to the minimum congestion window
414 //# (kMinimumWindow), similar to a TCP sender's response on an RTO
415 //# [RFC5681].
416 if persistent_congestion {
417 self.congestion_window = self.cubic.minimum_window();
418 self.state = State::SlowStart;
419 self.cubic.reset();
420 }
421 }
422
423 #[inline]
424 fn on_explicit_congestion<Pub: Publisher>(
425 &mut self,
426 _ce_count: u64,
427 event_time: Timestamp,
428 publisher: &mut Pub,
429 ) {
430 if self.state.is_slow_start() {
431 publisher.on_slow_start_exited(SlowStartExitCause::Ecn, self.congestion_window());
432 }
433
434 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.1
435 //# If a path has been validated to support Explicit Congestion
436 //# Notification (ECN) [RFC3168] [RFC8311], QUIC treats a Congestion
437 //# Experienced (CE) codepoint in the IP header as a signal of
438 //# congestion.
439 self.on_congestion_event(event_time);
440 }
441
442 //= https://www.rfc-editor.org/rfc/rfc8899#section-3
443 //= type=exception
444 //= reason=See https://github.com/aws/s2n-quic/issues/959
445 //# An update to the PLPMTU (or MPS) MUST NOT increase the congestion
446 //# window measured in bytes [RFC4821].
447
448 //= https://www.rfc-editor.org/rfc/rfc8899#section-3
449 //# A PL that maintains the congestion window in terms of a limit to
450 //# the number of outstanding fixed-size packets SHOULD adapt this
451 //# limit to compensate for the size of the actual packets.
452
453 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
454 //# If the maximum datagram size is decreased in order to complete the
455 //# handshake, the congestion window SHOULD be set to the new initial
456 //# congestion window.
457 #[inline]
458 fn on_mtu_update<Pub: Publisher>(&mut self, max_datagram_size: u16, _publisher: &mut Pub) {
459 let old_max_datagram_size = self.max_datagram_size;
460 self.max_datagram_size = max_datagram_size;
461 self.cubic.max_datagram_size = max_datagram_size;
462
463 let congestion_window =
464 (self.congestion_window / old_max_datagram_size as f32) * max_datagram_size as f32;
465 let initial_window =
466 Self::initial_window(&self.cubic, max_datagram_size, &Default::default());
467
468 self.congestion_window = max(congestion_window as u32, initial_window) as f32;
469 }
470
471 //= https://www.rfc-editor.org/rfc/rfc9002#section-6.4
472 //# When Initial and Handshake packet protection keys are discarded (see
473 //# Section 4.9 of [QUIC-TLS]), all packets that were sent with those
474 //# keys can no longer be acknowledged because their acknowledgments
475 //# cannot be processed. The sender MUST discard all recovery state
476 //# associated with those packets and MUST remove them from the count of
477 //# bytes in flight.
478 #[inline]
479 fn on_packet_discarded<Pub: Publisher>(&mut self, bytes_sent: usize, _publisher: &mut Pub) {
480 self.bytes_in_flight
481 .try_sub(bytes_sent)
482 .expect("bytes sent should not exceed u32::MAX");
483
484 if let Recovery(recovery_start_time, RequiresTransmission) = self.state {
485 // If any of the discarded packets were lost, they will no longer be retransmitted
486 // so flip the Recovery status back to Idle so it is not waiting for a
487 // retransmission that may never come.
488 self.state = Recovery(recovery_start_time, Idle);
489 }
490 }
491
492 #[inline]
493 fn earliest_departure_time(&self) -> Option<Timestamp> {
494 self.pacer.earliest_departure_time()
495 }
496}
497
498impl CubicCongestionController {
499 // max_datagram_size is the current max_datagram_size, and is
500 // expected to be 1200 when the congestion controller is created.
501 pub fn new(max_datagram_size: u16, app_settings: ApplicationSettings) -> Self {
502 let cubic = Cubic::new(max_datagram_size);
503 let congestion_window =
504 CubicCongestionController::initial_window(&cubic, max_datagram_size, &app_settings)
505 as f32;
506
507 Self {
508 cubic,
509 slow_start: HybridSlowStart::new(max_datagram_size),
510 pacer: Pacer::default(),
511 max_datagram_size,
512 congestion_window,
513 state: SlowStart,
514 bytes_in_flight: Counter::new(0),
515 time_of_last_sent_packet: None,
516 under_utilized: true,
517 bytes_in_flight_hi: Counter::new(0),
518 }
519 }
520
521 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
522 //# Endpoints SHOULD use an initial congestion
523 //# window of ten times the maximum datagram size (max_datagram_size),
524 //# while limiting the window to the larger of 14,720 bytes or twice the
525 //# maximum datagram size.
526
527 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
528 //# If the maximum datagram size changes during the connection, the
529 //# initial congestion window SHOULD be recalculated with the new size.
530 #[inline]
531 fn initial_window(
532 cubic: &Cubic,
533 max_datagram_size: u16,
534 app_settings: &ApplicationSettings,
535 ) -> u32 {
536 const INITIAL_WINDOW_LIMIT: u32 = 14720;
537 let default = min(
538 10 * max_datagram_size as u32,
539 max(INITIAL_WINDOW_LIMIT, 2 * max_datagram_size as u32),
540 );
541 let initial_window = app_settings.initial_congestion_window.unwrap_or(default);
542
543 max(initial_window, cubic.minimum_window() as u32)
544 }
545
546 #[inline]
547 fn congestion_avoidance(
548 &mut self,
549 t: Duration,
550 rtt: Duration,
551 sent_bytes: usize,
552 max_cwnd: f32,
553 ) {
554 let w_cubic = self.cubic.w_cubic(t);
555 let w_est = self.cubic.w_est(t, rtt);
556 // limit the window increase to half the acked bytes
557 // as the Linux implementation of Cubic does.
558 let max_cwnd = (self.congestion_window + sent_bytes as f32 / 2.0).min(max_cwnd);
559
560 if w_cubic < w_est {
561 // TCP-Friendly Region
562 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.2
563 //# When receiving an ACK in congestion avoidance (cwnd could be greater than
564 //# or less than W_max), CUBIC checks whether W_cubic(t) is less than
565 //# W_est(t). If so, CUBIC is in the TCP-friendly region and cwnd SHOULD
566 //# be set to W_est(t) at each reception of an ACK.
567 self.congestion_window = self.packets_to_bytes(w_est).min(max_cwnd);
568 } else {
569 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
570 //# Upon receiving an ACK during congestion avoidance, CUBIC computes the
571 //# window increase rate during the next RTT period using Eq. 1. It sets
572 //# W_cubic(t+RTT) as the candidate target value of the congestion
573 //# window
574
575 // Concave Region
576 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.3
577 //# When receiving an ACK in congestion avoidance, if CUBIC is not in the
578 //# TCP-friendly region and cwnd is less than W_max, then CUBIC is in the
579 //# concave region. In this region, cwnd MUST be incremented by
580 //# (W_cubic(t+RTT) - cwnd)/cwnd for each received ACK, where
581 //# W_cubic(t+RTT) is calculated using Eq. 1.
582
583 // Convex Region
584 //# https://www.rfc-editor.org/rfc/rfc8312#section-4.4
585 //# When receiving an ACK in congestion avoidance, if CUBIC is not in the
586 //# TCP-friendly region and cwnd is larger than or equal to W_max, then
587 //# CUBIC is in the convex region.
588
589 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.4
590 //# In this region, cwnd MUST be incremented by
591 //# (W_cubic(t+RTT) - cwnd)/cwnd for each received ACK, where
592 //# W_cubic(t+RTT) is calculated using Eq. 1.
593
594 // The congestion window is adjusted in the same way in the convex and concave regions.
595 // A target congestion window is calculated for where the congestion window should be
596 // by the end of one RTT. That target is used for calculating the required rate of increase
597 // based on where the congestion window currently is. Assuming a full congestion window's
598 // worth of packets will be sent and acknowledged within that RTT, we increase the
599 // congestion window by this increment for each acknowledgement. As long as all packets
600 // are sent and acknowledged by the end of the RTT, the congestion window will reach the
601 // target size. Otherwise it will be smaller, reflecting that the network latency is
602 // higher than needed to achieve the target window, and thus a smaller congestion window
603 // is appropriate.
604 let target_congestion_window = self.packets_to_bytes(self.cubic.w_cubic(t + rtt));
605
606 // Decreases in the RTT estimate can cause the congestion window to get ahead of the
607 // target. In the case where the congestion window has already exceeded the target,
608 // we return without any further adjustment to the window.
609 if self.congestion_window >= target_congestion_window {
610 return;
611 }
612
613 let window_increase_rate =
614 (target_congestion_window - self.congestion_window) / self.congestion_window;
615 let window_increment = self.packets_to_bytes(window_increase_rate);
616
617 self.congestion_window = (self.congestion_window + window_increment).min(max_cwnd);
618 }
619 }
620
621 #[inline]
622 fn on_congestion_event(&mut self, event_time: Timestamp) {
623 // Reset bytes_in_flight_hi
624 self.bytes_in_flight_hi = BytesInFlight::new(0);
625
626 // No reaction if already in a recovery period.
627 if matches!(self.state, Recovery(_, _)) {
628 return;
629 }
630
631 // Enter recovery period.
632
633 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.1
634 //# The sender MUST exit slow start and enter a recovery period when a
635 //# packet is lost or when the ECN-CE count reported by its peer
636 //# increases.
637
638 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
639 //# If the congestion window is reduced immediately, a
640 //# single packet can be sent prior to reduction. This speeds up loss
641 //# recovery if the data in the lost packet is retransmitted and is
642 //# similar to TCP as described in Section 5 of [RFC6675].
643 self.state = Recovery(event_time, RequiresTransmission);
644
645 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
646 //# Implementations MAY reduce the congestion window immediately upon
647 //# entering a recovery period or use other mechanisms, such as
648 //# Proportional Rate Reduction [PRR], to reduce the congestion window
649 //# more gradually.
650
651 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
652 //# The minimum congestion window is the smallest value the congestion
653 //# window can attain in response to loss, an increase in the peer-
654 //# reported ECN-CE count, or persistent congestion.
655 self.congestion_window = self.cubic.multiplicative_decrease(self.congestion_window);
656
657 // Update Hybrid Slow Start with the decreased congestion window.
658 self.slow_start.on_congestion_event(self.congestion_window);
659 }
660
661 #[inline]
662 fn packets_to_bytes(&self, cwnd: f32) -> f32 {
663 cwnd * self.max_datagram_size as f32
664 }
665
666 /// Returns true if the congestion window is under utilized and should not grow larger
667 /// without further evidence of the stability of the current window.
668 #[inline]
669 fn is_congestion_window_under_utilized(&self) -> bool {
670 // This value is based on kMaxBurstBytes from Chromium
671 // https://source.chromium.org/chromium/chromium/src/+/master:net/third_party/quiche/src/quic/core/congestion_control/tcp_cubic_sender_bytes.cc;l=23;drc=f803516d2656ed829e54b2e819731763ca6cf4d9
672 const MAX_BURST_MULTIPLIER: u32 = 3;
673
674 if self.is_congestion_limited() {
675 return false;
676 }
677
678 // In slow start, allow the congestion window to increase as long as half of it is
679 // being used. This allows for the window to increase rapidly.
680 if self.state.is_slow_start() && self.bytes_in_flight >= self.congestion_window() / 2 {
681 return false;
682 }
683
684 // Otherwise allow the window to increase while MAX_BURST_MULTIPLIER packets are available
685 // in the window.
686 let available_congestion_window = self
687 .congestion_window()
688 .saturating_sub(*self.bytes_in_flight);
689 available_congestion_window > self.max_datagram_size as u32 * MAX_BURST_MULTIPLIER
690 }
691}
692
693#[derive(Default, Debug, Clone, Copy)]
694pub struct ApplicationSettings {
695 initial_congestion_window: Option<u32>,
696}
697
698/// Core functions of "CUBIC for Fast Long-Distance Networks" as specified in
699/// https://tools.ietf.org/html/rfc8312. The unit of all window sizes is in
700/// packets of size max_datagram_size to maintain alignment with the specification.
701/// Thus, window sizes should be converted to bytes before applying to the
702/// congestion window in the congestion controller.
703#[derive(Clone, Debug)]
704struct Cubic {
705 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
706 //# W_max is the window size just before the window is
707 //# reduced in the last congestion event.
708 w_max: f32,
709 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
710 //# a flow remembers the last value of W_max before it
711 //# updates W_max for the current congestion event.
712 //# Let us call the last value of W_max to be W_last_max.
713 w_last_max: f32,
714 // k is the time until we expect to reach w_max
715 k: Duration,
716 max_datagram_size: u16,
717}
718
719//= https://www.rfc-editor.org/rfc/rfc8312#section-5.1
720//# Based on these observations and our experiments, we find C=0.4
721//# gives a good balance between TCP-friendliness and aggressiveness
722//# of window increase. Therefore, C SHOULD be set to 0.4.
723const C: f32 = 0.4;
724
725//= https://www.rfc-editor.org/rfc/rfc8312#section-4.5
726//# Parameter beta_cubic SHOULD be set to 0.7.
727const BETA_CUBIC: f32 = 0.7;
728
729impl Cubic {
730 pub fn new(max_datagram_size: u16) -> Self {
731 Cubic {
732 w_max: 0.0,
733 w_last_max: 0.0,
734 k: Duration::ZERO,
735 max_datagram_size,
736 }
737 }
738
739 /// Reset to the original state
740 #[inline]
741 pub fn reset(&mut self) {
742 self.w_max = 0.0;
743 self.w_last_max = 0.0;
744 self.k = Duration::ZERO;
745 }
746
747 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
748 //# CUBIC uses the following window increase function:
749 //#
750 //# W_cubic(t) = C*(t-K)^3 + W_max (Eq. 1)
751 //#
752 //# where C is a constant fixed to determine the aggressiveness of window
753 //# increase in high BDP networks, t is the elapsed time from the
754 //# beginning of the current congestion avoidance, and K is the time
755 //# period that the above function takes to increase the current window
756 //# size to W_max if there are no further congestion events and is
757 //# calculated using the following equation:
758 //#
759 //# K = cubic_root(W_max*(1-beta_cubic)/C) (Eq. 2)
760 //#
761 //# where beta_cubic is the CUBIC multiplication decrease factor
762 #[inline]
763 fn w_cubic(&self, t: Duration) -> f32 {
764 C * (t.as_secs_f32() - self.k.as_secs_f32()).powi(3) + self.w_max
765 }
766
767 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.2
768 //# W_est(t) = W_max*beta_cubic +
769 // [3*(1-beta_cubic)/(1+beta_cubic)] * (t/RTT) (Eq. 4)
770 #[inline]
771 fn w_est(&self, t: Duration, rtt: Duration) -> f32 {
772 self.w_max.mul_add(
773 BETA_CUBIC,
774 (3.0 * (1.0 - BETA_CUBIC) / (1.0 + BETA_CUBIC)) * (t.as_secs_f32() / rtt.as_secs_f32()),
775 )
776 }
777
778 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.5
779 //# When a packet loss is detected by duplicate ACKs or a network
780 //# congestion is detected by ECN-Echo ACKs, CUBIC updates its W_max,
781 //# cwnd, and ssthresh as follows. Parameter beta_cubic SHOULD be set to
782 //# 0.7.
783 //#
784 //# W_max = cwnd; // save window size before reduction
785 //# ssthresh = cwnd * beta_cubic; // new slow-start threshold
786 //# ssthresh = max(ssthresh, 2); // threshold is at least 2 MSS
787 //# cwnd = cwnd * beta_cubic; // window reduction
788 // This does not change the units of the congestion window
789 #[inline]
790 fn multiplicative_decrease(&mut self, cwnd: f32) -> f32 {
791 self.w_max = self.bytes_to_packets(cwnd);
792
793 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
794 //# To speed up this bandwidth release by
795 //# existing flows, the following mechanism called "fast convergence"
796 //# SHOULD be implemented.
797
798 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
799 //# With fast convergence, when a congestion event occurs, before the
800 //# window reduction of the congestion window, a flow remembers the last
801 //# value of W_max before it updates W_max for the current congestion
802 //# event. Let us call the last value of W_max to be W_last_max.
803 //#
804 //# if (W_max < W_last_max){ // should we make room for others
805 //# W_last_max = W_max; // remember the last W_max
806 //# W_max = W_max*(1.0+beta_cubic)/2.0; // further reduce W_max
807 //# } else {
808 //# W_last_max = W_max // remember the last W_max
809 //# }
810 //#
811 //# At a congestion event, if the current value of W_max is less than
812 //# W_last_max, this indicates that the saturation point experienced by
813 //# this flow is getting reduced because of the change in available
814 //# bandwidth. Then we allow this flow to release more bandwidth by
815 //# reducing W_max further. This action effectively lengthens the time
816 //# for this flow to increase its congestion window because the reduced
817 //# W_max forces the flow to have the plateau earlier. This allows more
818 //# time for the new flow to catch up to its congestion window size.
819 let w_max = self.w_max;
820 if w_max < self.w_last_max {
821 self.w_max = (w_max * (1.0 + BETA_CUBIC) / 2.0)
822 .max(self.bytes_to_packets(self.minimum_window()));
823 }
824 self.w_last_max = w_max;
825
826 let cwnd_start = (cwnd * BETA_CUBIC).max(self.minimum_window());
827
828 //= https://tools.ietf.org/id/draft-eggert-tcpm-rfc8312bis-01#4.2
829 //# _K_ is the time period that the above
830 //# function takes to increase the congestion window size at the
831 //# beginning of the current congestion avoidance stage to _W_(max)_ if
832 //# there are no further congestion events and is calculated using the
833 //# following equation:
834 //#
835 //# ________________
836 //# /W - cwnd
837 //# 3 / max start
838 //# K = | / ----------------
839 //# |/ C
840 //#
841 //# Figure 2
842 //#
843 //# where _cwnd_(start)_ is the congestion window at the beginning of the
844 //# current congestion avoidance stage.
845 self.k =
846 Duration::from_secs_f32(((self.w_max - self.bytes_to_packets(cwnd_start)) / C).cbrt());
847
848 cwnd_start
849 }
850
851 //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
852 //# In the case when CUBIC runs the hybrid slow start [HR08], it may exit
853 //# the first slow start without incurring any packet loss and thus W_max
854 //# is undefined. In this special case, CUBIC switches to congestion
855 //# avoidance and increases its congestion window size using Eq. 1, where
856 //# t is the elapsed time since the beginning of the current congestion
857 //# avoidance, K is set to 0, and W_max is set to the congestion window
858 //# size at the beginning of the current congestion avoidance.
859 #[inline]
860 fn on_slow_start_exit(&mut self, cwnd: f32) {
861 self.w_max = self.bytes_to_packets(cwnd);
862
863 // We are currently at the w_max, so set k to zero indicating zero
864 // seconds to reach the max
865 self.k = Duration::from_secs(0);
866 }
867
868 //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
869 //# The minimum congestion window is the smallest value the congestion
870 //# window can attain in response to loss, an increase in the peer-
871 //# reported ECN-CE count, or persistent congestion. The RECOMMENDED
872 //# value is 2 * max_datagram_size.
873 #[inline]
874 fn minimum_window(&self) -> f32 {
875 2.0 * self.max_datagram_size as f32
876 }
877
878 #[inline]
879 fn bytes_to_packets(&self, bytes: f32) -> f32 {
880 bytes / self.max_datagram_size as f32
881 }
882}
883
884#[non_exhaustive]
885#[derive(Debug, Default)]
886pub struct Endpoint {
887 app_settings: ApplicationSettings,
888}
889
890impl congestion_controller::Endpoint for Endpoint {
891 type CongestionController = CubicCongestionController;
892
893 fn new_congestion_controller(
894 &mut self,
895 path_info: congestion_controller::PathInfo,
896 ) -> Self::CongestionController {
897 CubicCongestionController::new(path_info.max_datagram_size, self.app_settings)
898 }
899}
900
901pub mod builder {
902 use super::{ApplicationSettings, Endpoint};
903
904 /// Build the congestion controller endpoint with application provided overrides
905 #[derive(Default)]
906 pub struct Builder {
907 initial_congestion_window: Option<u32>,
908 }
909
910 impl Builder {
911 /// Set the initial congestion window in bytes.
912 pub fn with_initial_congestion_window(mut self, initial_congestion_window: u32) -> Self {
913 self.initial_congestion_window = Some(initial_congestion_window);
914 self
915 }
916
917 pub fn build(self) -> Endpoint {
918 let app_settings = ApplicationSettings {
919 initial_congestion_window: self.initial_congestion_window,
920 };
921 Endpoint { app_settings }
922 }
923 }
924}
925
926#[cfg(test)]
927mod tests;