s2n_quic_core/recovery/
cubic.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    counter::Counter,
6    event::builder::SlowStartExitCause,
7    random,
8    recovery::{
9        congestion_controller::{self, CongestionController, Publisher},
10        cubic::{FastRetransmission::*, State::*},
11        hybrid_slow_start::HybridSlowStart,
12        pacing::Pacer,
13        RttEstimator,
14    },
15    time::Timestamp,
16};
17use core::{
18    cmp::{max, min},
19    time::Duration,
20};
21#[cfg(not(feature = "std"))]
22use num_traits::Float as _;
23
24//= https://www.rfc-editor.org/rfc/rfc9002#section-7.3
25//#                 New Path or      +------------+
26//#            persistent congestion |   Slow     |
27//#        (O)---------------------->|   Start    |
28//#                                  +------------+
29//#                                        |
30//#                                Loss or |
31//#                        ECN-CE increase |
32//#                                        v
33//# +------------+     Loss or       +------------+
34//# | Congestion |  ECN-CE increase  |  Recovery  |
35//# | Avoidance  |------------------>|   Period   |
36//# +------------+                   +------------+
37//#           ^                            |
38//#           |                            |
39//#          +----------------------------+
40//#              Acknowledgment of packet
41//#                sent during recovery
42// This implementation uses Hybrid Slow Start, which allows for
43// Slow Start to exit directly to Congestion Avoidance.
44#[derive(Clone, Debug, PartialEq, Eq)]
45enum State {
46    SlowStart,
47    Recovery(Timestamp, FastRetransmission),
48    CongestionAvoidance(CongestionAvoidanceTiming),
49}
50
51impl State {
52    /// Returns State::CongestionAvoidance initialized with the given `start_time`
53    fn congestion_avoidance(start_time: Timestamp) -> Self {
54        Self::CongestionAvoidance(CongestionAvoidanceTiming {
55            start_time,
56            window_increase_time: start_time,
57            app_limited_time: None,
58        })
59    }
60
61    /// Called when app limited after sending has completed for a round and an ACK has been received.
62    fn on_app_limited(&mut self, timestamp: Timestamp) {
63        if let CongestionAvoidance(ref mut timing) = self {
64            debug_assert!(
65                timing
66                    .app_limited_time
67                    .is_none_or(|app_limited_time| timestamp >= app_limited_time),
68                "timestamp must be monotonically increasing"
69            );
70            debug_assert!(
71                timestamp >= timing.window_increase_time,
72                "timestamp must not be before the window was last increased"
73            );
74
75            timing.app_limited_time = Some(timestamp);
76        }
77    }
78
79    /// Returns true if the state is `SlowStart`
80    fn is_slow_start(&self) -> bool {
81        matches!(self, SlowStart)
82    }
83}
84
85//= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
86//# If the congestion window is reduced immediately, a
87//# single packet can be sent prior to reduction.  This speeds up loss
88//# recovery if the data in the lost packet is retransmitted and is
89//# similar to TCP as described in Section 5 of [RFC6675].
90#[derive(Clone, Debug, PartialEq, Eq)]
91enum FastRetransmission {
92    Idle,
93    RequiresTransmission,
94}
95
96#[derive(Clone, Debug, PartialEq, Eq)]
97struct CongestionAvoidanceTiming {
98    // The time the congestion avoidance state was entered
99    start_time: Timestamp,
100    // The time the congestion window was last increased
101    window_increase_time: Timestamp,
102    // The last time the current congestion window was underutilized
103    app_limited_time: Option<Timestamp>,
104}
105
106impl CongestionAvoidanceTiming {
107    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
108    //# t is the elapsed time from the beginning of the current congestion avoidance
109    fn t(&self, timestamp: Timestamp) -> Duration {
110        timestamp - self.start_time
111    }
112
113    /// Called when the congestion window is being increased.
114    ///
115    /// Adjusts the start time by the period from the last window increase to the app limited time
116    /// to avoid counting the app limited time period in W_cubic.
117    fn on_window_increase(&mut self, timestamp: Timestamp) {
118        if let Some(app_limited_time) = self.app_limited_time.take() {
119            //= https://www.rfc-editor.org/rfc/rfc8312#section-5.8
120            //# CUBIC does not raise its congestion window size if the flow is
121            //# currently limited by the application instead of the congestion
122            //# window.  In case of long periods when cwnd has not been updated due
123            //# to the application rate limit, such as idle periods, t in Eq. 1 MUST
124            //# NOT include these periods; otherwise, W_cubic(t) might be very high
125            //# after restarting from these periods.
126
127            // Adjust the congestion avoidance start time by the app limited duration
128            self.start_time += app_limited_time - self.window_increase_time;
129        }
130
131        self.window_increase_time = timestamp;
132    }
133}
134
135/// A congestion controller that implements "CUBIC for Fast Long-Distance Networks"
136/// as specified in <https://tools.ietf.org/html/rfc8312>. The Hybrid Slow Start algorithm
137/// is used for determining the slow start threshold.
138#[derive(Clone, Debug)]
139pub struct CubicCongestionController {
140    cubic: Cubic,
141    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
142    //# CUBIC MUST employ a slow-start algorithm, when the cwnd is no more
143    //# than ssthresh.
144
145    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
146    //# Among the slow-start algorithms, CUBIC MAY choose the
147    //# standard TCP slow start [RFC5681] in general networks, or the limited
148    //# slow start [RFC3742] or hybrid slow start [HR08] for fast and long-
149    //# distance networks.
150    slow_start: HybridSlowStart,
151    pacer: Pacer,
152    max_datagram_size: u16,
153    congestion_window: f32,
154    state: State,
155    //= https://www.rfc-editor.org/rfc/rfc9002#appendix-B.2
156    //# The sum of the size in bytes of all sent packets
157    //# that contain at least one ack-eliciting or PADDING frame and have
158    //# not been acknowledged or declared lost.  The size does not include
159    //# IP or UDP overhead, but does include the QUIC header and
160    //# Authenticated Encryption with Associated Data (AEAD) overhead.
161    //# Packets only containing ACK frames do not count toward
162    //# bytes_in_flight to ensure congestion control does not impede
163    //# congestion feedback.
164
165    // ACK + PADDING packets do not contribute to bytes_in_flight
166    // See https://github.com/aws/s2n-quic/pull/1514
167    bytes_in_flight: BytesInFlight,
168    time_of_last_sent_packet: Option<Timestamp>,
169    under_utilized: bool,
170    // The highest number of bytes in flight seen when an ACK was received,
171    // since the last congestion event.
172    bytes_in_flight_hi: BytesInFlight,
173}
174
175type BytesInFlight = Counter<u32>;
176
177impl CongestionController for CubicCongestionController {
178    type PacketInfo = ();
179
180    #[inline]
181    fn congestion_window(&self) -> u32 {
182        self.congestion_window as u32
183    }
184
185    #[inline]
186    fn bytes_in_flight(&self) -> u32 {
187        *self.bytes_in_flight
188    }
189
190    #[inline]
191    fn is_congestion_limited(&self) -> bool {
192        let available_congestion_window = self
193            .congestion_window()
194            .saturating_sub(*self.bytes_in_flight);
195        available_congestion_window < self.max_datagram_size as u32
196    }
197
198    #[inline]
199    fn requires_fast_retransmission(&self) -> bool {
200        matches!(self.state, Recovery(_, RequiresTransmission))
201    }
202
203    #[inline]
204    fn on_packet_sent<Pub: Publisher>(
205        &mut self,
206        time_sent: Timestamp,
207        bytes_sent: usize,
208        app_limited: Option<bool>,
209        rtt_estimator: &RttEstimator,
210        publisher: &mut Pub,
211    ) {
212        if bytes_sent == 0 {
213            // Packet was not congestion controlled
214            return;
215        }
216
217        self.bytes_in_flight
218            .try_add(bytes_sent)
219            .expect("bytes sent should not exceed u32::MAX");
220
221        if let Some(app_limited) = app_limited {
222            // We check both the given `app_limited` value and is_congestion_window_under_utilized()
223            // as is_congestion_window_under_utilized() is more lenient with respect to the utilization
224            // of the congestion window than the app_limited check. is_congestion_window_under_utilized()
225            // returns true if there are more than 3 MTU's of space left in the cwnd, or less than
226            // half the cwnd is utilized in slow start.
227            self.under_utilized = app_limited && self.is_congestion_window_under_utilized();
228        } else {
229            // We don't externally determine `app_limited` in the Initial and Handshake packet
230            // spaces, so set under_utilized based on is_congestion_window_under_utilized alone
231            self.under_utilized = self.is_congestion_window_under_utilized();
232        }
233
234        if let Recovery(recovery_start_time, RequiresTransmission) = self.state {
235            // A packet has been sent since we entered recovery (fast retransmission)
236            // so flip the state back to idle.
237            self.state = Recovery(recovery_start_time, Idle);
238        }
239
240        self.time_of_last_sent_packet = Some(time_sent);
241
242        self.pacer.on_packet_sent(
243            time_sent,
244            bytes_sent,
245            rtt_estimator,
246            self.congestion_window(),
247            self.max_datagram_size,
248            self.state.is_slow_start(),
249            publisher,
250        );
251    }
252
253    #[inline]
254    fn on_rtt_update<Pub: Publisher>(
255        &mut self,
256        time_sent: Timestamp,
257        now: Timestamp,
258        rtt_estimator: &RttEstimator,
259        publisher: &mut Pub,
260    ) {
261        // Update the Slow Start algorithm each time the RTT
262        // estimate is updated to find the slow start threshold.
263        self.slow_start.on_rtt_update(
264            self.congestion_window,
265            time_sent,
266            self.time_of_last_sent_packet
267                .expect("At least one packet must be sent to update RTT"),
268            rtt_estimator.latest_rtt(),
269        );
270
271        if self.state.is_slow_start() && self.congestion_window >= self.slow_start.threshold {
272            publisher.on_slow_start_exited(SlowStartExitCause::Rtt, self.congestion_window());
273            //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
274            //# In the case when CUBIC runs the hybrid slow start [HR08], it may exit
275            //# the first slow start without incurring any packet loss and thus W_max
276            //# is undefined.  In this special case, CUBIC switches to congestion
277            //# avoidance and increases its congestion window size using Eq. 1, where
278            //# t is the elapsed time since the beginning of the current congestion
279            //# avoidance, K is set to 0, and W_max is set to the congestion window
280            //# size at the beginning of the current congestion avoidance.
281            self.state = State::congestion_avoidance(now);
282            self.cubic.on_slow_start_exit(self.congestion_window);
283        }
284    }
285
286    #[inline]
287    fn on_ack<Pub: Publisher>(
288        &mut self,
289        newest_acked_time_sent: Timestamp,
290        bytes_acknowledged: usize,
291        _newest_acked_packet_info: Self::PacketInfo,
292        rtt_estimator: &RttEstimator,
293        _random_generator: &mut dyn random::Generator,
294        ack_receive_time: Timestamp,
295        publisher: &mut Pub,
296    ) {
297        self.bytes_in_flight_hi = self.bytes_in_flight_hi.max(self.bytes_in_flight);
298        self.bytes_in_flight
299            .try_sub(bytes_acknowledged)
300            .expect("bytes_acknowledged should not exceed u32::MAX");
301
302        if self.under_utilized {
303            self.state.on_app_limited(ack_receive_time);
304
305            //= https://www.rfc-editor.org/rfc/rfc9002#section-7.8
306            //# When bytes in flight is smaller than the congestion window and
307            //# sending is not pacing limited, the congestion window is
308            //# underutilized.  This can happen due to insufficient application data
309            //# or flow control limits.  When this occurs, the congestion window
310            //# SHOULD NOT be increased in either slow start or congestion avoidance.
311            return;
312        }
313
314        // Check if this ack causes the controller to exit recovery
315        if let State::Recovery(recovery_start_time, _) = self.state {
316            if newest_acked_time_sent > recovery_start_time {
317                //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
318                //# A recovery period ends and the sender enters congestion avoidance
319                //# when a packet sent during the recovery period is acknowledged.
320                self.state = State::congestion_avoidance(ack_receive_time)
321            }
322        };
323
324        // The congestion window may continue to grow while app-limited due to pacing
325        // interrupting sending while momentarily not app-limited. To avoid the congestion
326        // window growing too far beyond bytes in flight, we limit the maximum cwnd to
327        // the highest bytes_in_flight value seen since the last congestion event multiplied
328        // by a multiplier depending on the current state.
329        const SLOW_START_MAX_CWND_MULTIPLIER: f32 = 2.0;
330        const MAX_CWND_MULTIPLIER: f32 = 1.5;
331        let max_cwnd = match self.state {
332            SlowStart => *self.bytes_in_flight_hi as f32 * SLOW_START_MAX_CWND_MULTIPLIER,
333            Recovery(_, _) => self.congestion_window,
334            CongestionAvoidance(_) => *self.bytes_in_flight_hi as f32 * MAX_CWND_MULTIPLIER,
335        }
336        .max(self.cubic.minimum_window());
337
338        if self.congestion_window >= max_cwnd {
339            // The window is already larger than the max, so we can return early
340            return;
341        }
342
343        match self.state {
344            SlowStart => {
345                //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.1
346                //# While a sender is in slow start, the congestion window increases by
347                //# the number of bytes acknowledged when each acknowledgment is
348                //# processed.  This results in exponential growth of the congestion
349                //# window.
350                self.congestion_window = (self.congestion_window
351                    + self.slow_start.cwnd_increment(bytes_acknowledged))
352                .min(max_cwnd);
353
354                if self.congestion_window >= self.slow_start.threshold {
355                    // The congestion window has exceeded a previously determined slow start threshold
356                    // so transition to congestion avoidance and notify cubic of the slow start exit
357                    publisher
358                        .on_slow_start_exited(SlowStartExitCause::Other, self.congestion_window());
359                    self.state = State::congestion_avoidance(ack_receive_time);
360                    self.cubic.on_slow_start_exit(self.congestion_window);
361                }
362            }
363            Recovery(_, _) => {
364                // Don't increase the congestion window while in recovery
365            }
366            CongestionAvoidance(ref mut timing) => {
367                timing.on_window_increase(ack_receive_time);
368
369                //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
370                //# t is the elapsed time from the beginning of the current congestion avoidance
371                let t = timing.t(ack_receive_time);
372
373                //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
374                //# RTT is the weighted average RTT
375                // TODO: Linux Kernel Cubic implementation uses min RTT, possibly
376                //      because it is more stable than smoothed_rtt. Other implementations
377                //      have followed Linux's choice, so we will as well. The end result is a more
378                //      conservative rate of increase of the congestion window. This requires
379                //      investigation and testing to evaluate if smoothed_rtt would be a better input.
380                let rtt = rtt_estimator.min_rtt();
381
382                self.congestion_avoidance(t, rtt, bytes_acknowledged, max_cwnd);
383            }
384        };
385
386        debug_assert!(self.congestion_window >= self.cubic.minimum_window());
387    }
388
389    #[inline]
390    fn on_packet_lost<Pub: Publisher>(
391        &mut self,
392        lost_bytes: u32,
393        _packet_info: Self::PacketInfo,
394        persistent_congestion: bool,
395        _new_loss_burst: bool,
396        _random_generator: &mut dyn random::Generator,
397        timestamp: Timestamp,
398        publisher: &mut Pub,
399    ) {
400        debug_assert!(lost_bytes > 0);
401
402        self.bytes_in_flight -= lost_bytes;
403
404        if self.state.is_slow_start() && !persistent_congestion {
405            publisher
406                .on_slow_start_exited(SlowStartExitCause::PacketLoss, self.congestion_window());
407        }
408
409        self.on_congestion_event(timestamp);
410
411        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.6.2
412        //# When persistent congestion is declared, the sender's congestion
413        //# window MUST be reduced to the minimum congestion window
414        //# (kMinimumWindow), similar to a TCP sender's response on an RTO
415        //# [RFC5681].
416        if persistent_congestion {
417            self.congestion_window = self.cubic.minimum_window();
418            self.state = State::SlowStart;
419            self.cubic.reset();
420        }
421    }
422
423    #[inline]
424    fn on_explicit_congestion<Pub: Publisher>(
425        &mut self,
426        _ce_count: u64,
427        event_time: Timestamp,
428        publisher: &mut Pub,
429    ) {
430        if self.state.is_slow_start() {
431            publisher.on_slow_start_exited(SlowStartExitCause::Ecn, self.congestion_window());
432        }
433
434        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.1
435        //# If a path has been validated to support Explicit Congestion
436        //# Notification (ECN) [RFC3168] [RFC8311], QUIC treats a Congestion
437        //# Experienced (CE) codepoint in the IP header as a signal of
438        //# congestion.
439        self.on_congestion_event(event_time);
440    }
441
442    //= https://www.rfc-editor.org/rfc/rfc8899#section-3
443    //= type=exception
444    //= reason=See https://github.com/aws/s2n-quic/issues/959
445    //# An update to the PLPMTU (or MPS) MUST NOT increase the congestion
446    //# window measured in bytes [RFC4821].
447
448    //= https://www.rfc-editor.org/rfc/rfc8899#section-3
449    //# A PL that maintains the congestion window in terms of a limit to
450    //# the number of outstanding fixed-size packets SHOULD adapt this
451    //# limit to compensate for the size of the actual packets.
452
453    //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
454    //# If the maximum datagram size is decreased in order to complete the
455    //# handshake, the congestion window SHOULD be set to the new initial
456    //# congestion window.
457    #[inline]
458    fn on_mtu_update<Pub: Publisher>(&mut self, max_datagram_size: u16, _publisher: &mut Pub) {
459        let old_max_datagram_size = self.max_datagram_size;
460        self.max_datagram_size = max_datagram_size;
461        self.cubic.max_datagram_size = max_datagram_size;
462
463        let congestion_window =
464            (self.congestion_window / old_max_datagram_size as f32) * max_datagram_size as f32;
465        let initial_window =
466            Self::initial_window(&self.cubic, max_datagram_size, &Default::default());
467
468        self.congestion_window = max(congestion_window as u32, initial_window) as f32;
469    }
470
471    //= https://www.rfc-editor.org/rfc/rfc9002#section-6.4
472    //# When Initial and Handshake packet protection keys are discarded (see
473    //# Section 4.9 of [QUIC-TLS]), all packets that were sent with those
474    //# keys can no longer be acknowledged because their acknowledgments
475    //# cannot be processed.  The sender MUST discard all recovery state
476    //# associated with those packets and MUST remove them from the count of
477    //# bytes in flight.
478    #[inline]
479    fn on_packet_discarded<Pub: Publisher>(&mut self, bytes_sent: usize, _publisher: &mut Pub) {
480        self.bytes_in_flight
481            .try_sub(bytes_sent)
482            .expect("bytes sent should not exceed u32::MAX");
483
484        if let Recovery(recovery_start_time, RequiresTransmission) = self.state {
485            // If any of the discarded packets were lost, they will no longer be retransmitted
486            // so flip the Recovery status back to Idle so it is not waiting for a
487            // retransmission that may never come.
488            self.state = Recovery(recovery_start_time, Idle);
489        }
490    }
491
492    #[inline]
493    fn earliest_departure_time(&self) -> Option<Timestamp> {
494        self.pacer.earliest_departure_time()
495    }
496}
497
498impl CubicCongestionController {
499    // max_datagram_size is the current max_datagram_size, and is
500    // expected to be 1200 when the congestion controller is created.
501    pub fn new(max_datagram_size: u16, app_settings: ApplicationSettings) -> Self {
502        let cubic = Cubic::new(max_datagram_size);
503        let congestion_window =
504            CubicCongestionController::initial_window(&cubic, max_datagram_size, &app_settings)
505                as f32;
506
507        Self {
508            cubic,
509            slow_start: HybridSlowStart::new(max_datagram_size),
510            pacer: Pacer::default(),
511            max_datagram_size,
512            congestion_window,
513            state: SlowStart,
514            bytes_in_flight: Counter::new(0),
515            time_of_last_sent_packet: None,
516            under_utilized: true,
517            bytes_in_flight_hi: Counter::new(0),
518        }
519    }
520
521    //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
522    //# Endpoints SHOULD use an initial congestion
523    //# window of ten times the maximum datagram size (max_datagram_size),
524    //# while limiting the window to the larger of 14,720 bytes or twice the
525    //# maximum datagram size.
526
527    //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
528    //# If the maximum datagram size changes during the connection, the
529    //# initial congestion window SHOULD be recalculated with the new size.
530    #[inline]
531    fn initial_window(
532        cubic: &Cubic,
533        max_datagram_size: u16,
534        app_settings: &ApplicationSettings,
535    ) -> u32 {
536        const INITIAL_WINDOW_LIMIT: u32 = 14720;
537        let default = min(
538            10 * max_datagram_size as u32,
539            max(INITIAL_WINDOW_LIMIT, 2 * max_datagram_size as u32),
540        );
541        let initial_window = app_settings.initial_congestion_window.unwrap_or(default);
542
543        max(initial_window, cubic.minimum_window() as u32)
544    }
545
546    #[inline]
547    fn congestion_avoidance(
548        &mut self,
549        t: Duration,
550        rtt: Duration,
551        sent_bytes: usize,
552        max_cwnd: f32,
553    ) {
554        let w_cubic = self.cubic.w_cubic(t);
555        let w_est = self.cubic.w_est(t, rtt);
556        // limit the window increase to half the acked bytes
557        // as the Linux implementation of Cubic does.
558        let max_cwnd = (self.congestion_window + sent_bytes as f32 / 2.0).min(max_cwnd);
559
560        if w_cubic < w_est {
561            // TCP-Friendly Region
562            //= https://www.rfc-editor.org/rfc/rfc8312#section-4.2
563            //# When receiving an ACK in congestion avoidance (cwnd could be greater than
564            //# or less than W_max), CUBIC checks whether W_cubic(t) is less than
565            //# W_est(t).  If so, CUBIC is in the TCP-friendly region and cwnd SHOULD
566            //# be set to W_est(t) at each reception of an ACK.
567            self.congestion_window = self.packets_to_bytes(w_est).min(max_cwnd);
568        } else {
569            //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
570            //# Upon receiving an ACK during congestion avoidance, CUBIC computes the
571            //# window increase rate during the next RTT period using Eq. 1.  It sets
572            //# W_cubic(t+RTT) as the candidate target value of the congestion
573            //# window
574
575            // Concave Region
576            //= https://www.rfc-editor.org/rfc/rfc8312#section-4.3
577            //# When receiving an ACK in congestion avoidance, if CUBIC is not in the
578            //# TCP-friendly region and cwnd is less than W_max, then CUBIC is in the
579            //# concave region.  In this region, cwnd MUST be incremented by
580            //# (W_cubic(t+RTT) - cwnd)/cwnd for each received ACK, where
581            //# W_cubic(t+RTT) is calculated using Eq. 1.
582
583            // Convex Region
584            //# https://www.rfc-editor.org/rfc/rfc8312#section-4.4
585            //# When receiving an ACK in congestion avoidance, if CUBIC is not in the
586            //# TCP-friendly region and cwnd is larger than or equal to W_max, then
587            //# CUBIC is in the convex region.
588
589            //= https://www.rfc-editor.org/rfc/rfc8312#section-4.4
590            //# In this region, cwnd MUST be incremented by
591            //# (W_cubic(t+RTT) - cwnd)/cwnd for each received ACK, where
592            //# W_cubic(t+RTT) is calculated using Eq. 1.
593
594            // The congestion window is adjusted in the same way in the convex and concave regions.
595            // A target congestion window is calculated for where the congestion window should be
596            // by the end of one RTT. That target is used for calculating the required rate of increase
597            // based on where the congestion window currently is. Assuming a full congestion window's
598            // worth of packets will be sent and acknowledged within that RTT, we increase the
599            // congestion window by this increment for each acknowledgement. As long as all packets
600            // are sent and acknowledged by the end of the RTT, the congestion window will reach the
601            // target size. Otherwise it will be smaller, reflecting that the network latency is
602            // higher than needed to achieve the target window, and thus a smaller congestion window
603            // is appropriate.
604            let target_congestion_window = self.packets_to_bytes(self.cubic.w_cubic(t + rtt));
605
606            // Decreases in the RTT estimate can cause the congestion window to get ahead of the
607            // target. In the case where the congestion window has already exceeded the target,
608            // we return without any further adjustment to the window.
609            if self.congestion_window >= target_congestion_window {
610                return;
611            }
612
613            let window_increase_rate =
614                (target_congestion_window - self.congestion_window) / self.congestion_window;
615            let window_increment = self.packets_to_bytes(window_increase_rate);
616
617            self.congestion_window = (self.congestion_window + window_increment).min(max_cwnd);
618        }
619    }
620
621    #[inline]
622    fn on_congestion_event(&mut self, event_time: Timestamp) {
623        // Reset bytes_in_flight_hi
624        self.bytes_in_flight_hi = BytesInFlight::new(0);
625
626        // No reaction if already in a recovery period.
627        if matches!(self.state, Recovery(_, _)) {
628            return;
629        }
630
631        // Enter recovery period.
632
633        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.1
634        //# The sender MUST exit slow start and enter a recovery period when a
635        //# packet is lost or when the ECN-CE count reported by its peer
636        //# increases.
637
638        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
639        //# If the congestion window is reduced immediately, a
640        //# single packet can be sent prior to reduction.  This speeds up loss
641        //# recovery if the data in the lost packet is retransmitted and is
642        //# similar to TCP as described in Section 5 of [RFC6675].
643        self.state = Recovery(event_time, RequiresTransmission);
644
645        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.3.2
646        //# Implementations MAY reduce the congestion window immediately upon
647        //# entering a recovery period or use other mechanisms, such as
648        //# Proportional Rate Reduction [PRR], to reduce the congestion window
649        //# more gradually.
650
651        //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
652        //# The minimum congestion window is the smallest value the congestion
653        //# window can attain in response to loss, an increase in the peer-
654        //# reported ECN-CE count, or persistent congestion.
655        self.congestion_window = self.cubic.multiplicative_decrease(self.congestion_window);
656
657        // Update Hybrid Slow Start with the decreased congestion window.
658        self.slow_start.on_congestion_event(self.congestion_window);
659    }
660
661    #[inline]
662    fn packets_to_bytes(&self, cwnd: f32) -> f32 {
663        cwnd * self.max_datagram_size as f32
664    }
665
666    /// Returns true if the congestion window is under utilized and should not grow larger
667    /// without further evidence of the stability of the current window.
668    #[inline]
669    fn is_congestion_window_under_utilized(&self) -> bool {
670        // This value is based on kMaxBurstBytes from Chromium
671        // https://source.chromium.org/chromium/chromium/src/+/master:net/third_party/quiche/src/quic/core/congestion_control/tcp_cubic_sender_bytes.cc;l=23;drc=f803516d2656ed829e54b2e819731763ca6cf4d9
672        const MAX_BURST_MULTIPLIER: u32 = 3;
673
674        if self.is_congestion_limited() {
675            return false;
676        }
677
678        // In slow start, allow the congestion window to increase as long as half of it is
679        // being used. This allows for the window to increase rapidly.
680        if self.state.is_slow_start() && self.bytes_in_flight >= self.congestion_window() / 2 {
681            return false;
682        }
683
684        // Otherwise allow the window to increase while MAX_BURST_MULTIPLIER packets are available
685        // in the window.
686        let available_congestion_window = self
687            .congestion_window()
688            .saturating_sub(*self.bytes_in_flight);
689        available_congestion_window > self.max_datagram_size as u32 * MAX_BURST_MULTIPLIER
690    }
691}
692
693#[derive(Default, Debug, Clone, Copy)]
694pub struct ApplicationSettings {
695    initial_congestion_window: Option<u32>,
696}
697
698/// Core functions of "CUBIC for Fast Long-Distance Networks" as specified in
699/// https://tools.ietf.org/html/rfc8312. The unit of all window sizes is in
700/// packets of size max_datagram_size to maintain alignment with the specification.
701/// Thus, window sizes should be converted to bytes before applying to the
702/// congestion window in the congestion controller.
703#[derive(Clone, Debug)]
704struct Cubic {
705    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
706    //# W_max is the window size just before the window is
707    //# reduced in the last congestion event.
708    w_max: f32,
709    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
710    //# a flow remembers the last value of W_max before it
711    //# updates W_max for the current congestion event.
712    //# Let us call the last value of W_max to be W_last_max.
713    w_last_max: f32,
714    // k is the time until we expect to reach w_max
715    k: Duration,
716    max_datagram_size: u16,
717}
718
719//= https://www.rfc-editor.org/rfc/rfc8312#section-5.1
720//# Based on these observations and our experiments, we find C=0.4
721//# gives a good balance between TCP-friendliness and aggressiveness
722//# of window increase.  Therefore, C SHOULD be set to 0.4.
723const C: f32 = 0.4;
724
725//= https://www.rfc-editor.org/rfc/rfc8312#section-4.5
726//# Parameter beta_cubic SHOULD be set to 0.7.
727const BETA_CUBIC: f32 = 0.7;
728
729impl Cubic {
730    pub fn new(max_datagram_size: u16) -> Self {
731        Cubic {
732            w_max: 0.0,
733            w_last_max: 0.0,
734            k: Duration::ZERO,
735            max_datagram_size,
736        }
737    }
738
739    /// Reset to the original state
740    #[inline]
741    pub fn reset(&mut self) {
742        self.w_max = 0.0;
743        self.w_last_max = 0.0;
744        self.k = Duration::ZERO;
745    }
746
747    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.1
748    //# CUBIC uses the following window increase function:
749    //#
750    //#    W_cubic(t) = C*(t-K)^3 + W_max (Eq. 1)
751    //#
752    //# where C is a constant fixed to determine the aggressiveness of window
753    //# increase in high BDP networks, t is the elapsed time from the
754    //# beginning of the current congestion avoidance, and K is the time
755    //# period that the above function takes to increase the current window
756    //# size to W_max if there are no further congestion events and is
757    //# calculated using the following equation:
758    //#
759    //#    K = cubic_root(W_max*(1-beta_cubic)/C) (Eq. 2)
760    //#
761    //# where beta_cubic is the CUBIC multiplication decrease factor
762    #[inline]
763    fn w_cubic(&self, t: Duration) -> f32 {
764        C * (t.as_secs_f32() - self.k.as_secs_f32()).powi(3) + self.w_max
765    }
766
767    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.2
768    //# W_est(t) = W_max*beta_cubic +
769    //               [3*(1-beta_cubic)/(1+beta_cubic)] * (t/RTT) (Eq. 4)
770    #[inline]
771    fn w_est(&self, t: Duration, rtt: Duration) -> f32 {
772        self.w_max.mul_add(
773            BETA_CUBIC,
774            (3.0 * (1.0 - BETA_CUBIC) / (1.0 + BETA_CUBIC)) * (t.as_secs_f32() / rtt.as_secs_f32()),
775        )
776    }
777
778    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.5
779    //# When a packet loss is detected by duplicate ACKs or a network
780    //# congestion is detected by ECN-Echo ACKs, CUBIC updates its W_max,
781    //# cwnd, and ssthresh as follows.  Parameter beta_cubic SHOULD be set to
782    //# 0.7.
783    //#
784    //#    W_max = cwnd;                 // save window size before reduction
785    //#    ssthresh = cwnd * beta_cubic; // new slow-start threshold
786    //#    ssthresh = max(ssthresh, 2);  // threshold is at least 2 MSS
787    //#    cwnd = cwnd * beta_cubic;     // window reduction
788    // This does not change the units of the congestion window
789    #[inline]
790    fn multiplicative_decrease(&mut self, cwnd: f32) -> f32 {
791        self.w_max = self.bytes_to_packets(cwnd);
792
793        //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
794        //# To speed up this bandwidth release by
795        //# existing flows, the following mechanism called "fast convergence"
796        //# SHOULD be implemented.
797
798        //= https://www.rfc-editor.org/rfc/rfc8312#section-4.6
799        //# With fast convergence, when a congestion event occurs, before the
800        //# window reduction of the congestion window, a flow remembers the last
801        //# value of W_max before it updates W_max for the current congestion
802        //# event.  Let us call the last value of W_max to be W_last_max.
803        //#
804        //#    if (W_max < W_last_max){ // should we make room for others
805        //#       W_last_max = W_max;             // remember the last W_max
806        //#       W_max = W_max*(1.0+beta_cubic)/2.0; // further reduce W_max
807        //#    } else {
808        //#       W_last_max = W_max              // remember the last W_max
809        //#    }
810        //#
811        //# At a congestion event, if the current value of W_max is less than
812        //# W_last_max, this indicates that the saturation point experienced by
813        //# this flow is getting reduced because of the change in available
814        //# bandwidth.  Then we allow this flow to release more bandwidth by
815        //# reducing W_max further.  This action effectively lengthens the time
816        //# for this flow to increase its congestion window because the reduced
817        //# W_max forces the flow to have the plateau earlier.  This allows more
818        //# time for the new flow to catch up to its congestion window size.
819        let w_max = self.w_max;
820        if w_max < self.w_last_max {
821            self.w_max = (w_max * (1.0 + BETA_CUBIC) / 2.0)
822                .max(self.bytes_to_packets(self.minimum_window()));
823        }
824        self.w_last_max = w_max;
825
826        let cwnd_start = (cwnd * BETA_CUBIC).max(self.minimum_window());
827
828        //= https://tools.ietf.org/id/draft-eggert-tcpm-rfc8312bis-01#4.2
829        //# _K_ is the time period that the above
830        //# function takes to increase the congestion window size at the
831        //# beginning of the current congestion avoidance stage to _W_(max)_ if
832        //# there are no further congestion events and is calculated using the
833        //# following equation:
834        //#
835        //#                                ________________
836        //#                               /W    - cwnd
837        //#                           3  /  max       start
838        //#                       K = | /  ----------------
839        //#                           |/           C
840        //#
841        //#                                Figure 2
842        //#
843        //# where _cwnd_(start)_ is the congestion window at the beginning of the
844        //# current congestion avoidance stage.
845        self.k =
846            Duration::from_secs_f32(((self.w_max - self.bytes_to_packets(cwnd_start)) / C).cbrt());
847
848        cwnd_start
849    }
850
851    //= https://www.rfc-editor.org/rfc/rfc8312#section-4.8
852    //# In the case when CUBIC runs the hybrid slow start [HR08], it may exit
853    //# the first slow start without incurring any packet loss and thus W_max
854    //# is undefined.  In this special case, CUBIC switches to congestion
855    //# avoidance and increases its congestion window size using Eq. 1, where
856    //# t is the elapsed time since the beginning of the current congestion
857    //# avoidance, K is set to 0, and W_max is set to the congestion window
858    //# size at the beginning of the current congestion avoidance.
859    #[inline]
860    fn on_slow_start_exit(&mut self, cwnd: f32) {
861        self.w_max = self.bytes_to_packets(cwnd);
862
863        // We are currently at the w_max, so set k to zero indicating zero
864        // seconds to reach the max
865        self.k = Duration::from_secs(0);
866    }
867
868    //= https://www.rfc-editor.org/rfc/rfc9002#section-7.2
869    //# The minimum congestion window is the smallest value the congestion
870    //# window can attain in response to loss, an increase in the peer-
871    //# reported ECN-CE count, or persistent congestion.  The RECOMMENDED
872    //# value is 2 * max_datagram_size.
873    #[inline]
874    fn minimum_window(&self) -> f32 {
875        2.0 * self.max_datagram_size as f32
876    }
877
878    #[inline]
879    fn bytes_to_packets(&self, bytes: f32) -> f32 {
880        bytes / self.max_datagram_size as f32
881    }
882}
883
884#[non_exhaustive]
885#[derive(Debug, Default)]
886pub struct Endpoint {
887    app_settings: ApplicationSettings,
888}
889
890impl congestion_controller::Endpoint for Endpoint {
891    type CongestionController = CubicCongestionController;
892
893    fn new_congestion_controller(
894        &mut self,
895        path_info: congestion_controller::PathInfo,
896    ) -> Self::CongestionController {
897        CubicCongestionController::new(path_info.max_datagram_size, self.app_settings)
898    }
899}
900
901pub mod builder {
902    use super::{ApplicationSettings, Endpoint};
903
904    /// Build the congestion controller endpoint with application provided overrides
905    #[derive(Default)]
906    pub struct Builder {
907        initial_congestion_window: Option<u32>,
908    }
909
910    impl Builder {
911        /// Set the initial congestion window in bytes.
912        pub fn with_initial_congestion_window(mut self, initial_congestion_window: u32) -> Self {
913            self.initial_congestion_window = Some(initial_congestion_window);
914            self
915        }
916
917        pub fn build(self) -> Endpoint {
918            let app_settings = ApplicationSettings {
919                initial_congestion_window: self.initial_congestion_window,
920            };
921            Endpoint { app_settings }
922        }
923    }
924}
925
926#[cfg(test)]
927mod tests;
s2n_quic_core/recovery/cubic.rs

s2n_quic_core/recovery/
cubic.rs