s2n_quic_core/recovery/bbr/full_pipe.rs
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    counter::{Counter, Saturating},
6    recovery::{
7        bandwidth,
8        bandwidth::Bandwidth,
9        bbr::{ApplicationSettings, BbrCongestionController},
10    },
11};
12use num_rational::Ratio;
13
14/// Estimator for determining if BBR has fully utilized its available bandwidth ("filled the pipe")
15#[derive(Debug, Default, Clone)]
16pub(crate) struct Estimator {
17    //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.2
18    //# BBRInitFullPipe():
19    //#  BBR.filled_pipe = false
20    //#  BBR.full_bw = 0
21    //#  BBR.full_bw_count = 0
22
23    //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#2.13
24    //# A boolean that records whether BBR estimates that it has ever
25    //# fully utilized its available bandwidth ("filled the pipe").
26    filled_pipe: bool,
27    //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#2.13
28    //# A recent baseline BBR.max_bw to estimate if BBR has "filled the pipe" in Startup.
29    full_bw: Bandwidth,
30    //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#2.13
31    //# The number of non-app-limited round trips without large increases in BBR.full_bw.
32    full_bw_count: Counter<u8, Saturating>,
33    /// The number of rounds where the ECN CE markings exceed ECN_THRESH
34    ecn_ce_rounds: Counter<u8, Saturating>,
35}
36
37impl Estimator {
38    /// Returns true if BBR estimates that is has ever fully utilized its available bandwidth
39    #[inline]
40    pub fn filled_pipe(&self) -> bool {
41        self.filled_pipe
42    }
43
44    /// Called on each new BBR round
45    #[inline]
46    pub fn on_round_start(
47        &mut self,
48        rate_sample: bandwidth::RateSample,
49        max_bw: Bandwidth,
50        ecn_ce_count_too_high: bool,
51    ) {
52        if self.filled_pipe {
53            return;
54        }
55
56        self.filled_pipe = self.bandwidth_plateaued(rate_sample, max_bw)
57            || self.excessive_explicit_congestion(ecn_ce_count_too_high);
58    }
59
60    /// Called on each new loss round
61    ///
62    /// Excessive inflight is checked at the end of a loss round, not a regular round, as done
63    /// in tcp_bbr2.c/bbr2_check_loss_too_high_in_startup
64    ///
65    /// See https://github.com/google/bbr/blob/1a45fd4faf30229a3d3116de7bfe9d2f933d3562/net/ipv4/tcp_bbr2.c#L2133
66    #[inline]
67    pub fn on_loss_round_start(
68        &mut self,
69        rate_sample: bandwidth::RateSample,
70        loss_bursts_in_round: u8,
71        max_datagram_size: u16,
72        app_settings: &ApplicationSettings,
73    ) {
74        if self.filled_pipe {
75            return;
76        }
77
78        self.filled_pipe = self.excessive_inflight(
79            rate_sample,
80            loss_bursts_in_round,
81            max_datagram_size,
82            app_settings,
83        );
84    }
85
86    /// Determines if the rate of increase of bandwidth has decreased enough to estimate the
87    /// available bandwidth has been fully utilized.
88    ///
89    /// Based on bbr_check_full_bw_reached in tcp_bbr2.c
90    #[inline]
91    fn bandwidth_plateaued(
92        &mut self,
93        rate_sample: bandwidth::RateSample,
94        max_bw: Bandwidth,
95    ) -> bool {
96        //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.2
97        //# BBRCheckStartupFullBandwidth():
98        //#   if BBR.filled_pipe or
99        //#     !BBR.round_start or rs.is_app_limited
100        //#    return  /* no need to check for a full pipe now */
101        //#   if (BBR.max_bw >= BBR.full_bw * 1.25)  /* still growing? */
102        //#     BBR.full_bw = BBR.max_bw    /* record new baseline level */
103        //#     BBR.full_bw_count = 0
104        //#   return
105        //#   BBR.full_bw_count++ /* another round w/o much growth */
106        //#   if (BBR.full_bw_count >= 3)
107        //#     BBR.filled_pipe = true
108
109        //# If BBR notices that there are several (three) rounds where attempts to double
110        //# the delivery rate actually result in little increase (less than 25 percent),
111        //# then it estimates that it has reached BBR.max_bw, sets BBR.filled_pipe to true,
112        //# exits Startup and enters Drain.
113        const DELIVERY_RATE_INCREASE: Ratio<u64> = Ratio::new_raw(5, 4); // 1.25
114        const BANDWIDTH_PLATEAU_ROUND_COUNT: u8 = 3;
115
116        if rate_sample.is_app_limited {
117            //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.2
118            //# Once per round trip, upon an ACK that acknowledges new data, and when
119            //# the delivery rate sample is not application-limited (see [draft-
120            //# cheng-iccrg-delivery-rate-estimation]), BBR runs the "full pipe" estimator
121            return false;
122        }
123
124        if max_bw >= self.full_bw * DELIVERY_RATE_INCREASE {
125            // still growing?
126            self.full_bw = max_bw; // record new baseline level
127            self.full_bw_count = Counter::default(); // restart the count
128            return false;
129        }
130
131        /* another round w/o much growth */
132        self.full_bw_count += 1;
133
134        // Bandwidth has plateaued if the number of rounds without much growth
135        // reaches `BANDWIDTH_PLATEAU_ROUND_COUNT`
136        self.full_bw_count >= BANDWIDTH_PLATEAU_ROUND_COUNT
137    }
138
139    /// Determines if inflight has been too high (due to either loss or ECN markings) and enough
140    /// distinct loss bursts have been observed to estimate the available bandwidth has been fully utilized.
141    #[inline]
142    fn excessive_inflight(
143        &mut self,
144        rate_sample: bandwidth::RateSample,
145        loss_bursts_in_round: u8,
146        max_datagram_size: u16,
147        app_settings: &ApplicationSettings,
148    ) -> bool {
149        //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.3
150        //# A second method BBR uses for estimating the bottleneck is full is by looking at sustained
151        //# packet losses Specifically for a case where the following criteria are all met:
152        //#
153        //#    *  The connection has been in fast recovery for at least one full round trip.
154        //#    *  The loss rate over the time scale of a single full round trip exceeds BBRLossThresh (2%).
155
156        //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.3
157        //= type=exception
158        //= reason=Chromium and Linux TCP BBRv2 both use 8 lost bursts in a round trip
159        //#    *  There are at least BBRStartupFullLossCnt=3 discontiguous sequence ranges lost in that round trip.
160
161        // See https://github.com/google/bbr/blob/1a45fd4faf30229a3d3116de7bfe9d2f933d3562/net/ipv4/tcp_bbr2.c#L2325-L2329
162        // and https://source.chromium.org/chromium/chromium/src/+/main:net/third_party/quiche/src/quiche/quic/core/quic_protocol_flags_list.h;l=135;bpv=1;bpt=0
163        const STARTUP_FULL_LOSS_COUNT: u8 = 8;
164
165        //= https://tools.ietf.org/id/draft-cardwell-iccrg-bbr-congestion-control-02#4.3.1.3
166        //= type=exception
167        //= reason=The BBRv2 RFC reference to "fast recovery" here is more applicable to TCP.
168        //#    *  The connection has been in fast recovery for at least one full round trip.
169
170        // The BBRv2 RFC reference to "fast recovery" here seems more applicable to TCP, rather than
171        // QUIC. Instead, we just consider the loss rate and number of loss bursts over the round trip.
172        // This is more in line with the Chromium BBRv2 implementation.
173        // See: https://source.chromium.org/chromium/chromium/src/+/main:net/third_party/quiche/src/quiche/quic/core/congestion_control/bbr2_startup.cc;l=104
174
175        if loss_bursts_in_round < STARTUP_FULL_LOSS_COUNT {
176            // is_inflight_too_high returns true when ECN CE markings exceed the threshold, even
177            // if the loss burst count is below the threshold. During startup, excessive ECN CE
178            // is separately checked over multiple rounds, so return immediately if we have
179            // not seen enough loss bursts. This follows the Linux TCP BBRv2 implementation
180            // See https://github.com/google/bbr/blob/1a45fd4faf30229a3d3116de7bfe9d2f933d3562/net/ipv4/tcp_bbr2.c#L2150
181            return false;
182        }
183
184        BbrCongestionController::is_inflight_too_high(
185            rate_sample,
186            max_datagram_size,
187            loss_bursts_in_round,
188            STARTUP_FULL_LOSS_COUNT,
189            app_settings,
190        )
191    }
192
193    /// Determines if enough consecutive rounds of explicit congestion have been encountered that we
194    /// can estimate the available bandwidth has been fully utilized.
195    ///
196    /// Based on bbr2_check_ecn_too_high_in_startup from https://github.com/google/bbr/blob/1a45fd4faf30229a3d3116de7bfe9d2f933d3562/net/ipv4/tcp_bbr2.c#L1372
197    fn excessive_explicit_congestion(&mut self, ecn_ce_count_too_high: bool) -> bool {
198        // Startup is exited if the number of consecutive round trips with ECN CE markings above
199        // the ECN_THRESH exceed this value
200        // Value from https://github.com/google/bbr/blob/1a45fd4faf30229a3d3116de7bfe9d2f933d3562/net/ipv4/tcp_bbr2.c#L2334
201        const STARTUP_FULL_ECN_COUNT: u8 = 2;
202
203        if ecn_ce_count_too_high {
204            self.ecn_ce_rounds += 1;
205        } else {
206            self.ecn_ce_rounds = Counter::default();
207        }
208
209        self.ecn_ce_rounds >= STARTUP_FULL_ECN_COUNT
210    }
211
212    #[cfg(test)]
213    pub fn set_filled_pipe_for_test(&mut self, filled_pipe: bool) {
214        self.filled_pipe = filled_pipe;
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use crate::{
222        path::MINIMUM_MAX_DATAGRAM_SIZE,
223        recovery::{bandwidth::RateSample, bbr::full_pipe},
224    };
225    use core::time::Duration;
226
227    #[test]
228    fn bandwidth_plateau() {
229        let mut fp_estimator = full_pipe::Estimator::default();
230        let rate_sample = RateSample::default();
231        let mut max_bw = Bandwidth::new(1000, Duration::from_secs(1));
232        fp_estimator.on_round_start(rate_sample, max_bw, false);
233
234        // Grow at 25% over 3 rounds
235        max_bw = max_bw * Ratio::new(5, 4); // 5/4 = 125%
236        for _ in 0..3 {
237            fp_estimator.on_round_start(rate_sample, max_bw, false);
238        }
239        // The pipe has not been filled yet since we have continued to grow bandwidth
240        assert!(!fp_estimator.filled_pipe());
241
242        // One more round with 24% growth, not growing fast enough to continue
243        max_bw = max_bw * Ratio::new(31, 25); // 31/25 = 124%
244        fp_estimator.on_round_start(rate_sample, max_bw, false);
245        // The pipe is considered full
246        assert!(fp_estimator.filled_pipe());
247    }
248
249    #[test]
250    fn bandwidth_plateau_app_limited() {
251        let mut fp_estimator = full_pipe::Estimator::default();
252        let rate_sample = RateSample {
253            is_app_limited: true,
254            ..Default::default()
255        };
256        let max_bw = Bandwidth::new(1000, Duration::from_secs(1));
257
258        // No growth, but app limited
259        for _ in 0..3 {
260            fp_estimator.on_round_start(rate_sample, max_bw, false);
261        }
262
263        // The pipe has not been filled yet since we were app limited
264        assert!(!fp_estimator.filled_pipe());
265    }
266
267    #[test]
268    fn excessive_inflight_due_to_loss() {
269        let mut fp_estimator = full_pipe::Estimator::default();
270        let rate_sample = RateSample {
271            // Set app_limited to true to ignore bandwidth plateau check
272            is_app_limited: true,
273            // More than 2% bytes lost
274            bytes_in_flight: 1000,
275            lost_bytes: 21,
276            ..Default::default()
277        };
278
279        // Only 7 loss bursts, not enough to be considered excessive loss
280        fp_estimator.on_loss_round_start(
281            rate_sample,
282            7,
283            MINIMUM_MAX_DATAGRAM_SIZE,
284            &Default::default(),
285        );
286        // The pipe has not been filled yet since there were only 2 loss bursts
287        assert!(!fp_estimator.filled_pipe());
288
289        // 3 loss bursts, enough to be considered excessive loss
290        fp_estimator.on_loss_round_start(
291            rate_sample,
292            8,
293            MINIMUM_MAX_DATAGRAM_SIZE,
294            &Default::default(),
295        );
296        // The pipe has been filled due to loss
297        assert!(fp_estimator.filled_pipe());
298    }
299
300    #[test]
301    fn excessive_inflight_due_to_ecn_ce() {
302        let mut fp_estimator = full_pipe::Estimator::default();
303        let rate_sample = RateSample {
304            // Set app_limited to true to ignore bandwidth plateau check
305            is_app_limited: true,
306            // >= ECN_THRESH (50%) of packets had ECN CE markings
307            ecn_ce_count: 5,
308            delivered_bytes: 9 * MINIMUM_MAX_DATAGRAM_SIZE as u64,
309            ..Default::default()
310        };
311
312        // Only 7 loss bursts, not enough to be considered excessive loss
313        fp_estimator.on_loss_round_start(
314            rate_sample,
315            7,
316            MINIMUM_MAX_DATAGRAM_SIZE,
317            &Default::default(),
318        );
319
320        fp_estimator.on_loss_round_start(
321            rate_sample,
322            8,
323            MINIMUM_MAX_DATAGRAM_SIZE,
324            &Default::default(),
325        );
326        // The pipe has been filled due to ECN
327        assert!(fp_estimator.filled_pipe());
328    }
329
330    #[test]
331    fn excessive_inflight_loss_rate_too_low() {
332        let mut fp_estimator = full_pipe::Estimator::default();
333        let rate_sample = RateSample {
334            // Set app_limited to true to ignore bandwidth plateau check
335            is_app_limited: true,
336            // 2% bytes lost, just below the threshold to be considered excessive
337            bytes_in_flight: 1000,
338            lost_bytes: 2,
339            ..Default::default()
340        };
341        // 8 loss bursts, enough to be considered excessive loss
342        fp_estimator.on_loss_round_start(
343            rate_sample,
344            8,
345            MINIMUM_MAX_DATAGRAM_SIZE,
346            &Default::default(),
347        );
348        // The pipe has not been filled yet since the loss rate was not high enough
349        assert!(!fp_estimator.filled_pipe());
350    }
351
352    #[test]
353    fn excessive_explicit_congestion() {
354        let mut fp_estimator = full_pipe::Estimator::default();
355        let rate_sample = RateSample {
356            // Set app_limited to true to ignore bandwidth plateau check
357            is_app_limited: true,
358            ..Default::default()
359        };
360
361        let max_bw = Bandwidth::new(1000, Duration::from_secs(1));
362
363        fp_estimator.on_round_start(rate_sample, max_bw, true);
364        // The pipe has not been filled yet since there was only one round with high ECN CE markings
365        assert!(!fp_estimator.filled_pipe());
366
367        fp_estimator.on_round_start(rate_sample, max_bw, false);
368        fp_estimator.on_round_start(rate_sample, max_bw, true);
369        // The pipe has not been filled yet since the low ecn rate sample reset the count,
370        // ie the high ecn rate samples were not contiguous
371        assert!(!fp_estimator.filled_pipe());
372
373        fp_estimator.on_round_start(rate_sample, max_bw, true);
374        // After two consecutive rounds of high ECN markings, the pipe is full
375        assert!(fp_estimator.filled_pipe());
376    }
377}