phantom_protocol/transport/
stream.rs

1//! Phantom Protocol - Stream Management
2//!
3//! Independently-flow-controlled, reliability-segmented data channels multiplexed
4//! within one session. Each [`Stream`] owns its own send/receive buffers, gap-free
5//! reliable offset space (A.5), SACK-driven loss detection (RFC 9002), RFC-6298 RTO
6//! estimator, and credit-based flow-control windows. Per-stream sequencing means a
7//! stall or loss on one stream does not head-of-line-block any other stream (HoL
8//! blocking still applies *within* a stream — reliable data is delivered strictly
9//! in send order via `accept_in_order`).
10
11use crate::errors::CoreError;
12use crate::transport::sack::Sack;
13use crate::transport::types::{SequenceNumber, StreamId};
14
15use bytes::Bytes;
16use std::collections::VecDeque;
17use std::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering};
18use std::sync::Arc;
19use std::time::Duration;
20use tokio::sync::{Mutex, Notify, Semaphore};
21
22const MAX_PENDING_PACKETS: usize = 1024;
23
24/// Upper bound on out-of-order segments held for reassembly per stream. In
25/// practice the flow-control window bounds in-flight (hence reorderable) data far
26/// below this; a peer that floods past its window with huge gaps is refused here
27/// (the refused segment is NOT recorded as received, so it is not SACKed and the
28/// sender retransmits it — no SACK-without-data hazard, bounded memory).
29const MAX_RECV_REORDER: usize = 2048;
30
31/// Per-stream byte budget for the out-of-order reorder buffer (H-3), tied to the flow-control
32/// window. A compliant peer keeps in-flight (hence reorderable) data within ~one
33/// [`INITIAL_STREAM_WINDOW`]; the 2× headroom absorbs a boundary segment. A future hole that
34/// would push the buffered total past this is refused (dropped → retransmitted via the
35/// "refused segment is not SACKed" contract), so per-stream reorder memory is bounded
36/// regardless of the per-entry frame size (~253 KiB UDP / 4 MiB TCP) — the entry cap alone is
37/// not, since one entry can dwarf the window.
38pub const MAX_RECV_REORDER_BYTES: usize = 2 * INITIAL_STREAM_WINDOW as usize;
39
40/// RFC 9002 §6.1.1 packet-threshold: a still-unacked segment is declared lost
41/// once a segment at least this many offsets *newer* has been SACK-acked.
42const PACKET_THRESHOLD: u32 = 3;
43
44/// Initial per-stream send window — caps how many bytes the local
45/// side will put on the wire before receiving a `WINDOW_UPDATE` from
46/// the peer. 64 KiB matches QUIC's stream initial-window default.
47pub const INITIAL_STREAM_WINDOW: u32 = 64 * 1024;
48
49/// Hard ceiling on the credit-based send window. `WINDOW_UPDATE` frames add
50/// *relative* credit; this caps the accumulated window so a peer that floods
51/// inflated credits cannot overflow the counter. A compliant peer never grants
52/// more than ~one [`INITIAL_STREAM_WINDOW`] of outstanding credit, so the cap is
53/// only a misbehaving-peer guard (the receiver's own delivery HARD_CAP is the
54/// real bound on buffering).
55pub const MAX_SEND_WINDOW: u32 = 8 * INITIAL_STREAM_WINDOW;
56
57/// Stream state
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum StreamState {
60    /// Stream is open for both directions
61    Open,
62    /// Local side has finished sending
63    HalfClosedLocal,
64    /// Remote side has finished sending
65    HalfClosedRemote,
66    /// Stream is fully closed
67    Closed,
68}
69
70/// Pending data waiting to be sent
71#[derive(Debug)]
72struct PendingData {
73    /// Gap-free per-stream reliable-data offset — the reassembly / SACK / loss-
74    /// detection key (A.5). Carried in the AEAD plaintext so the receiver can
75    /// deliver reliable data strictly in send order even when `sequence` has
76    /// control-frame holes. Stable across retransmits.
77    stream_offset: SequenceNumber,
78    data: Bytes,
79    sent_at: Option<tokio::time::Instant>,
80    #[allow(dead_code)]
81    retries: u32,
82    /// Flagged lost by the SACK-driven loss detector (RFC 9002 packet- or
83    /// time-threshold, L1-B). `poll_send`'s Pass-0 fast-retransmits it ahead of
84    /// cwnd/window, then clears the flag. Distinct from the RTO pass (Pass-1).
85    lost: bool,
86}
87
88/// One reliable segment retired by [`Stream::on_sack`] — a segment whose
89/// sequence a received SACK covered and which has now been removed from the
90/// send buffer.
91#[derive(Debug, Clone, Copy)]
92pub struct RetiredSegment {
93    /// When the segment was last (re)transmitted, if it had been sent at all.
94    /// `None` means the segment was acknowledged before `poll_send` ever stamped
95    /// it (e.g. a duplicate cumulative SACK) — no RTT sample is taken.
96    pub sent_at: Option<tokio::time::Instant>,
97    /// On-wire payload size of the segment.
98    pub size: u64,
99    /// True if the segment had been retransmitted at least once (`retries > 0`).
100    /// Per Karn's algorithm, the caller must NOT sample RTT from such a segment.
101    pub was_retransmit: bool,
102}
103
104/// One segment newly declared lost by [`Stream::on_sack`]'s RFC-9002 loss
105/// detector (L1-B) — still buffered, now flagged for fast-retransmit.
106#[derive(Debug, Clone, Copy)]
107pub struct LostSegment {
108    /// Gap-free reliable offset of the lost segment.
109    pub stream_offset: SequenceNumber,
110    /// On-wire payload size — the caller reports it to congestion control via
111    /// `Session::on_packet_lost`.
112    pub size: u64,
113}
114
115/// Outcome of processing a received SACK against the send buffer.
116#[derive(Debug, Default)]
117pub struct SackResult {
118    /// The segments newly retired by this SACK (were in the send buffer, now
119    /// removed). The caller feeds each into congestion control / the RTT
120    /// estimator. Empty if the SACK acknowledged nothing still buffered (e.g. a
121    /// duplicate or stale ACK).
122    pub retired: Vec<RetiredSegment>,
123    /// Segments newly declared lost (packet- or time-threshold, RFC 9002) by this
124    /// SACK — still buffered, now flagged for Pass-0 fast-retransmit. The caller
125    /// feeds each into `Session::on_packet_lost` (the real BBR loss signal).
126    pub lost: Vec<LostSegment>,
127}
128
129impl SackResult {
130    /// The gap-free offsets of the segments newly declared lost, ascending.
131    pub fn lost_offsets(&self) -> Vec<SequenceNumber> {
132        self.lost.iter().map(|l| l.stream_offset).collect()
133    }
134}
135
136/// One segment handed back by [`Stream::poll_send`] for transmission.
137#[derive(Debug, Clone)]
138pub struct OutboundSegment {
139    /// Gap-free per-stream reliable-data offset (A.5). The send path prepends it
140    /// (big-endian u32) to the AEAD plaintext of a reliable segment so the
141    /// receiver reassembles in send order regardless of control-frame holes.
142    /// Meaningless for unreliable segments (the send path does not prefix those).
143    pub stream_offset: SequenceNumber,
144    /// Payload bytes.
145    pub data: Bytes,
146    /// Whether the segment is on the reliable (ACK-tracked) path.
147    pub reliable: bool,
148    /// True when this is a retransmission (the RTO expired) rather than a first
149    /// transmission — the caller reports it to congestion control as a loss.
150    pub retransmit: bool,
151}
152
153/// RFC 6298 retransmission-timeout estimator (per stream). Replaces a fixed
154/// retransmit timer with one that tracks measured RTT (SRTT / RTTVAR) and backs
155/// off exponentially on consecutive timeouts.
156#[derive(Debug)]
157struct RtoEstimator {
158    /// Smoothed RTT; `None` until the first measurement.
159    srtt: Option<Duration>,
160    /// RTT variation estimate.
161    rttvar: Duration,
162    /// Number of consecutive timeouts (RTO is doubled `backoff_shift` times).
163    backoff_shift: u32,
164}
165
166impl RtoEstimator {
167    /// RFC 6298 (2.1): RTO before the first measurement.
168    const INITIAL_RTO: Duration = Duration::from_secs(1);
169    /// Floor — RFC's 1s minimum is too conservative for a low-latency transport.
170    const MIN_RTO: Duration = Duration::from_millis(200);
171    /// Ceiling, so a stalled path can't push the timer arbitrarily high.
172    const MAX_RTO: Duration = Duration::from_secs(60);
173    /// Clock-granularity term `G` in RFC 6298 (2.3).
174    const GRANULARITY: Duration = Duration::from_millis(1);
175    /// Cap on the backoff doubling (2^6 = 64×).
176    const MAX_BACKOFF_SHIFT: u32 = 6;
177
178    fn new() -> Self {
179        Self {
180            srtt: None,
181            rttvar: Duration::ZERO,
182            backoff_shift: 0,
183        }
184    }
185
186    /// Feed a fresh (non-retransmitted, per Karn) RTT measurement.
187    fn on_rtt_sample(&mut self, r: Duration) {
188        match self.srtt {
189            None => {
190                // RFC 6298 (2.2): first measurement.
191                self.srtt = Some(r);
192                self.rttvar = r / 2;
193            }
194            Some(srtt) => {
195                // RFC 6298 (2.3): RTTVAR = (1-1/4)·RTTVAR + 1/4·|SRTT-R|;
196                //                 SRTT  = (1-1/8)·SRTT  + 1/8·R.
197                let diff = srtt.abs_diff(r);
198                self.rttvar = (self.rttvar * 3 + diff) / 4;
199                self.srtt = Some((srtt * 7 + r) / 8);
200            }
201        }
202        // A fresh measurement clears any accumulated backoff.
203        self.backoff_shift = 0;
204    }
205
206    /// Current RTO, honoring backoff and the floor / ceiling.
207    fn rto(&self) -> Duration {
208        // RFC 6298 (2.2)/(2.3): RTO = SRTT + max(G, K·RTTVAR), K = 4.
209        let base = match self.srtt {
210            None => Self::INITIAL_RTO,
211            Some(srtt) => srtt + std::cmp::max(Self::GRANULARITY, self.rttvar * 4),
212        };
213        // Exponential backoff (RFC 6298 (5.5)); saturate to MAX_RTO on overflow.
214        let scaled = base
215            .checked_mul(1u32 << self.backoff_shift)
216            .unwrap_or(Self::MAX_RTO);
217        scaled.clamp(Self::MIN_RTO, Self::MAX_RTO)
218    }
219
220    /// On a retransmission timeout: double the RTO (RFC 6298 (5.5)).
221    fn on_timeout(&mut self) {
222        self.backoff_shift = (self.backoff_shift + 1).min(Self::MAX_BACKOFF_SHIFT);
223    }
224
225    /// Reset to the initial state (Phase 4 / QUIC §9.4): a migration path switch
226    /// lands on a different network, so the old RTT estimate must not carry over.
227    /// Wired by the P4.2 migration switch (`Stream::reset_rto`).
228    fn reset(&mut self) {
229        self.srtt = None;
230        self.rttvar = Duration::ZERO;
231        self.backoff_shift = 0;
232    }
233}
234
235#[cfg(test)]
236mod rto_tests {
237    use super::RtoEstimator;
238    use std::time::Duration;
239
240    #[test]
241    fn follows_rfc6298_srtt_rttvar() {
242        let mut est = RtoEstimator::new();
243        // No samples yet → initial 1s.
244        assert_eq!(est.rto(), Duration::from_secs(1));
245        // First sample R=100ms: SRTT=100, RTTVAR=50, RTO = 100 + 4*50 = 300ms.
246        est.on_rtt_sample(Duration::from_millis(100));
247        assert_eq!(est.rto(), Duration::from_millis(300));
248        // A steady stream of identical samples drives RTTVAR→0, so RTO→SRTT,
249        // floored at MIN_RTO (200ms).
250        for _ in 0..50 {
251            est.on_rtt_sample(Duration::from_millis(100));
252        }
253        assert_eq!(est.rto(), Duration::from_millis(200));
254    }
255
256    #[test]
257    fn backoff_doubles_and_fresh_sample_resets() {
258        let mut est = RtoEstimator::new();
259        est.on_rtt_sample(Duration::from_millis(100)); // RTO = 300ms
260        assert_eq!(est.rto(), Duration::from_millis(300));
261        est.on_timeout();
262        assert_eq!(est.rto(), Duration::from_millis(600));
263        est.on_timeout();
264        assert_eq!(est.rto(), Duration::from_millis(1200));
265        // A fresh measurement clears the backoff. This is a *second* sample, so
266        // RTTVAR shrinks 50ms → 37.5ms and RTO = 100 + 4*37.5 = 250ms. The key
267        // check is that backoff is gone: with shift still at 2 it would be 1000ms.
268        est.on_rtt_sample(Duration::from_millis(100));
269        assert_eq!(est.rto(), Duration::from_millis(250));
270    }
271
272    #[test]
273    fn reset_clears_estimate_and_backoff() {
274        let mut est = RtoEstimator::new();
275        // Build up an SRTT and a backed-off RTO.
276        est.on_rtt_sample(Duration::from_millis(100)); // RTO = 300ms
277        est.on_timeout(); // RTO = 600ms (backed off)
278        assert_eq!(est.rto(), Duration::from_millis(600));
279        // Phase 4 / QUIC §9.4: a migration path switch must reset the estimate so
280        // the new network's RTT is measured fresh (no stale tiny RTO => no
281        // spurious-retransmit storm on the first packets of the new path).
282        est.reset();
283        assert_eq!(est.rto(), Duration::from_secs(1)); // INITIAL_RTO, no backoff
284    }
285}
286
287/// Stream - multiplexed data channel within a session
288pub struct Stream {
289    /// Stream identifier
290    id: StreamId,
291    /// Current state
292    state: Mutex<StreamState>,
293    /// Gap-free per-stream reliable-data offset counter (A.5). Only reliable data
294    /// consumes it, so it has no control-frame holes; it is the reassembly / SACK
295    /// key carried in the reliable-data AEAD plaintext.
296    reliable_offset: AtomicU32,
297    /// Next expected receive **stream offset** (gap-free reassembly cursor, A.5).
298    recv_sequence: AtomicU32,
299    /// Send buffer (data waiting to be sent)
300    send_buffer: Mutex<VecDeque<PendingData>>,
301    /// Unreliable send buffer (fire and forget)
302    unreliable_buffer: Mutex<VecDeque<Bytes>>,
303    /// Receive buffer (out-of-order data). Each entry is one cursor position
304    /// `(sequence, payloads)`; `payloads` is normally a single reliable frame but
305    /// carries a COALESCED bundle's sub-payloads when several share one sequence.
306    recv_buffer: Mutex<VecDeque<(SequenceNumber, Vec<Bytes>)>>,
307    /// Total payload bytes currently held in `recv_buffer` (H-3). Mutated only under the
308    /// `recv_buffer` lock (in `accept_in_order`), so it stays exactly in step with the
309    /// buffer; an `AtomicUsize` only so it can be read lock-free for stats/tests. Bounds the
310    /// out-of-order reorder buffer by *bytes*, not entries, since one entry can be ~253 KiB
311    /// (UDP) / 4 MiB (TCP).
312    recv_buffer_bytes: AtomicUsize,
313    /// Ordered receive queue (ready for application)
314    recv_ready: Mutex<VecDeque<Bytes>>,
315    /// Notify when data is ready to read
316    recv_notify: Notify,
317    /// Whether stream is finished locally
318    local_finished: AtomicBool,
319    /// Whether stream is finished remotely
320    remote_finished: AtomicBool,
321    /// Priority (higher = more important)
322    priority: AtomicU32,
323    /// Backpressure semaphore
324    send_semaphore: Arc<Semaphore>,
325    /// Bytes the **peer** has granted us to send — decremented as we
326    /// emit payload bytes, replenished by inbound `WINDOW_UPDATE`
327    /// frames (Phase 4.3). When it hits zero, `poll_send` stalls
328    /// until the next `WINDOW_UPDATE`.
329    peer_send_window: AtomicU32,
330    /// Bytes the local side has granted the peer — replenished as
331    /// the application drains `recv_ready`. We periodically emit a
332    /// `WINDOW_UPDATE` carrying the new absolute window.
333    local_recv_window: AtomicU32,
334    /// Total bytes the local side has consumed since the last
335    /// emitted `WINDOW_UPDATE`. Used to decide when to send the
336    /// next update (avoid flooding the wire with tiny updates).
337    bytes_since_last_update: AtomicU32,
338    /// Pending **relative** flow-control credit to advertise in a
339    /// `WINDOW_UPDATE`, staged by the receive **delivery** task (which credits
340    /// the window on *real* app consumption) and flushed by the **send loop** —
341    /// the sole *outbound* writer, so the encrypted control frame is sealed by the
342    /// same task that stamps every data packet, under the epoch live at flush
343    /// time. (The epoch itself has TWO writers — the send loop's own `rekey()` and
344    /// the receive task's authenticated forward catch-up in
345    /// `decrypt_packet_accepting_rekey` — but both serialise through the session's
346    /// `rekey_lock`, so the send loop always seals under a consistent key.)
347    /// Credits accumulate additively, so several grants between two flushes are
348    /// never lost. `0` = nothing pending.
349    pending_window_update: AtomicU32,
350    /// RFC 6298 retransmission-timeout estimator. A plain (sync) mutex: it is
351    /// updated only from the serial ACK path and read by `poll_send`, and the
352    /// guard is never held across an `.await`.
353    rto: std::sync::Mutex<RtoEstimator>,
354    /// Receive instant of the most recent reliable data packet, used to populate
355    /// the SACK's `ack_delay_us` (`now − recv_at`). A plain sync mutex; the guard
356    /// is never held across an `.await`.
357    last_data_recv_at: std::sync::Mutex<Option<tokio::time::Instant>>,
358}
359
360impl Stream {
361    /// Create a new stream
362    pub fn new(id: StreamId) -> Self {
363        Self {
364            id,
365            state: Mutex::new(StreamState::Open),
366            reliable_offset: AtomicU32::new(0),
367            recv_sequence: AtomicU32::new(0),
368            send_buffer: Mutex::new(VecDeque::new()),
369            unreliable_buffer: Mutex::new(VecDeque::new()),
370            recv_buffer: Mutex::new(VecDeque::new()),
371            recv_buffer_bytes: AtomicUsize::new(0),
372            recv_ready: Mutex::new(VecDeque::new()),
373            recv_notify: Notify::new(),
374            local_finished: AtomicBool::new(false),
375            remote_finished: AtomicBool::new(false),
376            priority: AtomicU32::new(0),
377            send_semaphore: Arc::new(Semaphore::new(MAX_PENDING_PACKETS)),
378            peer_send_window: AtomicU32::new(INITIAL_STREAM_WINDOW),
379            local_recv_window: AtomicU32::new(INITIAL_STREAM_WINDOW),
380            bytes_since_last_update: AtomicU32::new(0),
381            pending_window_update: AtomicU32::new(0),
382            rto: std::sync::Mutex::new(RtoEstimator::new()),
383            last_data_recv_at: std::sync::Mutex::new(None),
384        }
385    }
386
387    // ── RFC 6298 retransmission timeout ──
388
389    /// Current retransmission timeout. A poisoned lock is recovered by taking
390    /// the inner value — the RTO is a heuristic, not a correctness invariant.
391    fn current_rto(&self) -> Duration {
392        match self.rto.lock() {
393            Ok(g) => g.rto(),
394            Err(poisoned) => poisoned.into_inner().rto(),
395        }
396    }
397
398    /// Reset the RTT estimator (Phase 4 / QUIC §9.4): a migration path switch lands
399    /// on a different network, so the old RTT must not carry over. A poisoned lock
400    /// is recovered by taking the inner value — the RTO is a heuristic.
401    pub fn reset_rto(&self) {
402        match self.rto.lock() {
403            Ok(mut g) => g.reset(),
404            Err(poisoned) => poisoned.into_inner().reset(),
405        }
406    }
407
408    /// Smoothed RTT estimate, or `None` before the first measurement. Feeds the
409    /// RFC-9002 time-threshold loss detector (L1-B).
410    fn smoothed_rtt(&self) -> Option<Duration> {
411        match self.rto.lock() {
412            Ok(g) => g.srtt,
413            Err(poisoned) => poisoned.into_inner().srtt,
414        }
415    }
416
417    /// Feed a fresh RTT measurement into the RTO estimator.
418    fn record_rtt_sample(&self, rtt: Duration) {
419        let mut g = match self.rto.lock() {
420            Ok(g) => g,
421            Err(poisoned) => poisoned.into_inner(),
422        };
423        g.on_rtt_sample(rtt);
424    }
425
426    /// Tell the RTO estimator a segment timed out (exponential backoff).
427    fn note_rto_timeout(&self) {
428        let mut g = match self.rto.lock() {
429            Ok(g) => g,
430            Err(poisoned) => poisoned.into_inner(),
431        };
432        g.on_timeout();
433    }
434
435    /// Get stream ID
436    pub fn id(&self) -> StreamId {
437        self.id
438    }
439
440    /// Get current state
441    pub async fn state(&self) -> StreamState {
442        *self.state.lock().await
443    }
444
445    /// Get priority
446    pub fn priority(&self) -> u32 {
447        self.priority.load(Ordering::Relaxed)
448    }
449
450    /// Set priority
451    pub fn set_priority(&self, priority: u32) {
452        self.priority.store(priority, Ordering::Relaxed);
453    }
454
455    // ── Flow control (Phase 4.3) ──
456
457    /// Bytes the peer currently allows us to send.
458    pub fn peer_send_window(&self) -> u32 {
459        self.peer_send_window.load(Ordering::Acquire)
460    }
461
462    /// Atomically reserve `n` bytes from the peer's send window.
463    /// Returns `true` if the reservation succeeded (and the window
464    /// was decremented); `false` if the window doesn't have enough
465    /// capacity — caller must wait for a `WINDOW_UPDATE`.
466    pub fn try_consume_send_window(&self, n: u32) -> bool {
467        let mut cur = self.peer_send_window.load(Ordering::Acquire);
468        loop {
469            if cur < n {
470                return false;
471            }
472            match self.peer_send_window.compare_exchange_weak(
473                cur,
474                cur - n,
475                Ordering::AcqRel,
476                Ordering::Acquire,
477            ) {
478                Ok(_) => return true,
479                Err(actual) => cur = actual,
480            }
481        }
482    }
483
484    /// Process an inbound `WINDOW_UPDATE` from the peer. The payload is a
485    /// **relative credit** — the number of bytes the peer's application just
486    /// consumed and is therefore newly willing to receive. We *add* it to the
487    /// send window (saturating at [`MAX_SEND_WINDOW`] so a misbehaving peer's
488    /// inflated credit cannot overflow the counter).
489    ///
490    /// Relative credit (vs. an absolute window) is what makes flow control
491    /// correct for a session of any length: the sender's window is
492    /// `initial + Σ credit_granted − Σ bytes_sent` = `initial + consumed −
493    /// sent`, so the receiver's outstanding (unconsumed) bytes `sent − consumed`
494    /// are bounded by `initial`. An absolute u32 window could not express this
495    /// for sessions exceeding 4 GiB and over-committed the receiver's buffer.
496    pub fn apply_peer_window_update(&self, credit: u32) {
497        let mut cur = self.peer_send_window.load(Ordering::Acquire);
498        loop {
499            let next = cur.saturating_add(credit).min(MAX_SEND_WINDOW);
500            if next == cur {
501                return; // already at the cap; nothing to add
502            }
503            match self.peer_send_window.compare_exchange_weak(
504                cur,
505                next,
506                Ordering::AcqRel,
507                Ordering::Acquire,
508            ) {
509                Ok(_) => return,
510                Err(actual) => cur = actual,
511            }
512        }
513    }
514
515    /// Bytes the local side has granted the peer.
516    pub fn local_recv_window(&self) -> u32 {
517        self.local_recv_window.load(Ordering::Acquire)
518    }
519
520    /// Record that the application has actually consumed `n` bytes from this
521    /// stream (called by the receive *delivery* task on real drainage, not
522    /// on routing). Accumulates the consumed bytes and, once the unreported
523    /// total crosses half the initial window, returns `Some(credit)` — the
524    /// **relative credit** to advertise in a `WINDOW_UPDATE` (the peer *adds*
525    /// it to its send window). The half-window threshold trades update frequency
526    /// against peer stalls.
527    pub fn record_app_consumed(&self, n: u32) -> Option<u32> {
528        let pending = self.bytes_since_last_update.fetch_add(n, Ordering::AcqRel) + n;
529        let threshold = INITIAL_STREAM_WINDOW / 2;
530        if pending >= threshold {
531            // Grant exactly the bytes we accumulated since the last update and
532            // reset the accumulator. Use a CAS-free `fetch_sub` of the granted
533            // amount rather than `store(0)` so a concurrent consume isn't lost.
534            self.bytes_since_last_update
535                .fetch_sub(pending, Ordering::AcqRel);
536            // Keep the (now informational) local_recv_window in step for stats.
537            self.local_recv_window.fetch_add(pending, Ordering::AcqRel);
538            Some(pending)
539        } else {
540            None
541        }
542    }
543
544    /// Stage relative flow-control credit to be flushed by the send loop.
545    /// Called by the receive delivery task after it credits real app
546    /// consumption. Credits **accumulate additively** (saturating at
547    /// `u32::MAX`) rather than overwriting, so several grants landing between
548    /// two send-loop flushes are summed instead of lost — the send loop is the
549    /// single emitter (epoch-safe), and it may run arbitrarily after a grant.
550    pub fn stage_window_update_credit(&self, credit: u32) {
551        let mut cur = self.pending_window_update.load(Ordering::Acquire);
552        loop {
553            let next = cur.saturating_add(credit);
554            if next == cur {
555                return; // nothing to add (zero credit, or already saturated)
556            }
557            match self.pending_window_update.compare_exchange_weak(
558                cur,
559                next,
560                Ordering::AcqRel,
561                Ordering::Acquire,
562            ) {
563                Ok(_) => return,
564                Err(actual) => cur = actual,
565            }
566        }
567    }
568
569    /// Take all staged credit (swaps the slot back to `0`). The send loop calls
570    /// this each drain pass and emits one `WINDOW_UPDATE` carrying the summed
571    /// credit if `Some`.
572    pub fn take_pending_window_update(&self) -> Option<u32> {
573        match self.pending_window_update.swap(0, Ordering::AcqRel) {
574            0 => None,
575            w => Some(w),
576        }
577    }
578
579    /// Assign the next gap-free reliable `stream_offset`, failing closed at `u32`
580    /// exhaustion (T4.5, reviewer §1). The cursor (`reliable_offset`) holds the
581    /// next-to-assign value; the last assignable offset is `u32::MAX - 1` (assigning
582    /// it advances the cursor to the `u32::MAX` exhaustion sentinel). A plain
583    /// `fetch_add(1)` would wrap `u32::MAX` back to `0`, re-issuing offset `0` and
584    /// corrupting reassembly / SACK dedup (a duplicate offset — NOT an AEAD nonce
585    /// reuse, since the nonce is the `u64` packet number). Instead we fail closed,
586    /// mirroring the epoch-saturation guard in [`Session::rekey`]. The CAS loop keeps
587    /// the "never wrap" invariant correct even under a (rare) concurrent caller.
588    fn next_reliable_offset(&self) -> Result<SequenceNumber, CoreError> {
589        loop {
590            let cur = self.reliable_offset.load(Ordering::SeqCst);
591            let next = cur.checked_add(1).ok_or_else(|| {
592                CoreError::StreamError(
593                    "reliable stream offset space exhausted (u32); reconnect required".into(),
594                )
595            })?;
596            if self
597                .reliable_offset
598                .compare_exchange(cur, next, Ordering::SeqCst, Ordering::SeqCst)
599                .is_ok()
600            {
601                return Ok(cur);
602            }
603        }
604    }
605
606    /// Queue data for sending with reliability.
607    ///
608    /// Returns the gap-free `stream_offset` assigned to this chunk (the reassembly
609    /// / SACK key). The wire packet number is assigned later, at send time, by the
610    /// data pump (① — Phase 4). Fails closed with [`CoreError::StreamError`] once the
611    /// `u32` offset space is exhausted (T4.5) — the acquired backpressure permit is
612    /// released on that path so the semaphore accounting stays correct.
613    pub async fn send_reliable(&self, data: Bytes) -> Result<SequenceNumber, CoreError> {
614        // Backpressure: wait until there is space in the buffer.
615        // PANIC-SAFETY: `Semaphore::acquire` only errors after `close()`. The
616        // `send_semaphore` is a private field of this struct, constructed in
617        // `Stream::new` and never closed anywhere in the crate — the variant
618        // is structurally unreachable.
619        #[allow(clippy::expect_used)]
620        let permit = self
621            .send_semaphore
622            .acquire()
623            .await
624            .expect("Semaphore closed");
625
626        // Gap-free reliable-data offset (A.5) — the reassembly / SACK key. Assigned
627        // BEFORE forgetting the permit so a fail-closed exhaustion (`?`) drops the
628        // permit and releases the slot instead of leaking backpressure capacity.
629        let stream_offset = self.next_reliable_offset()?;
630        permit.forget();
631
632        let pending = PendingData {
633            stream_offset,
634            data,
635            sent_at: None,
636            retries: 0,
637            lost: false,
638        };
639
640        self.send_buffer.lock().await.push_back(pending);
641
642        Ok(stream_offset)
643    }
644
645    /// Queue data for unreliable sending. Fire-and-forget; the wire packet number
646    /// is assigned at send time by the data pump (① — Phase 4).
647    pub async fn send_unreliable(&self, data: Bytes) {
648        // Unreliable data does not consume buffer permits.
649        self.unreliable_buffer.lock().await.push_back(data);
650    }
651
652    /// Get the next segment to (re)transmit, or `None` if nothing is due.
653    ///
654    /// `cwnd_budget` is how many bytes of *new* data the congestion window
655    /// currently permits. Retransmissions ignore it — loss recovery must always
656    /// proceed — but a first transmission is withheld (`None`) when it would
657    /// exceed the budget, so the next drain resumes once ACKs free the window.
658    /// Pass `u64::MAX` to disable the limit.
659    pub async fn poll_send(&self, cwnd_budget: u64) -> Option<OutboundSegment> {
660        // Unreliable data is fire-and-forget and not congestion-controlled.
661        if let Some(data) = self.unreliable_buffer.lock().await.pop_front() {
662            return Some(OutboundSegment {
663                // Unreliable segments are not reassembled; offset is unused (the
664                // send path does not prefix it).
665                stream_offset: 0,
666                data,
667                reliable: false,
668                retransmit: false,
669            });
670        }
671
672        let mut buffer = self.send_buffer.lock().await;
673        let now = tokio::time::Instant::now();
674        // Adaptive RFC 6298 timeout (was a fixed 500ms).
675        let timeout = self.current_rto();
676
677        // Pass 0: fast-retransmit a segment the SACK loss detector flagged (RFC
678        // 9002, L1-B). Recovers a loss in ~1 RTT instead of waiting out an RTO.
679        // Like Pass 1 it BYPASSES cwnd/window (loss recovery must always proceed —
680        // the flow-control invariant), but it does NOT back the RTO off (this was a
681        // SACK-detected loss, not a timeout). Clears the flag and marks the segment
682        // retransmitted (ambiguous for RTT — Karn).
683        for pending in buffer.iter_mut() {
684            if pending.lost && pending.sent_at.is_some() {
685                pending.lost = false;
686                pending.sent_at = Some(now);
687                pending.retries += 1;
688                return Some(OutboundSegment {
689                    stream_offset: pending.stream_offset,
690                    data: pending.data.clone(),
691                    reliable: true,
692                    retransmit: true,
693                });
694            }
695        }
696
697        // Pass 1: a timed-out segment (retransmission) — always allowed.
698        for pending in buffer.iter_mut() {
699            if let Some(sent_at) = pending.sent_at {
700                if now.duration_since(sent_at) >= timeout {
701                    pending.sent_at = Some(now);
702                    pending.retries += 1;
703                    // Back the RTO off exponentially for the next attempt.
704                    self.note_rto_timeout();
705                    return Some(OutboundSegment {
706                        stream_offset: pending.stream_offset,
707                        data: pending.data.clone(),
708                        reliable: true,
709                        retransmit: true,
710                    });
711                }
712            }
713        }
714
715        // Pass 2: the next unsent segment, if it fits BOTH the congestion window
716        // AND the peer's advertised flow-control window. In-order: if the head
717        // unsent segment doesn't fit, stop (don't skip). Retransmissions (Pass 1)
718        // bypass both budgets — those bytes were already accounted on first send
719        // (Karn), and loss recovery must always proceed.
720        for pending in buffer.iter_mut() {
721            if pending.sent_at.is_none() {
722                let len = pending.data.len() as u64;
723                if len > cwnd_budget {
724                    return None; // congestion window full — wait for ACKs to free it
725                }
726                // Flow-control enforcement: consume the peer's advertised
727                // receive window. If it is exhausted, withhold the segment and
728                // wait for a `WINDOW_UPDATE` — this is what propagates a slow
729                // peer-side consumer back to us as real backpressure (the
730                // receive delivery task only credits the window on actual app
731                // consumption). `try_consume_send_window` is an atomic CAS; on
732                // success the window is debited and we WILL send (no later check
733                // can fail), so the debit never leaks.
734                if !self.try_consume_send_window(len as u32) {
735                    return None; // peer flow-control window closed — wait for WINDOW_UPDATE
736                }
737                pending.sent_at = Some(now);
738                return Some(OutboundSegment {
739                    stream_offset: pending.stream_offset,
740                    data: pending.data.clone(),
741                    reliable: true,
742                    retransmit: false,
743                });
744            }
745        }
746
747        None
748    }
749
750    /// Mark a sequence number as acknowledged.
751    /// Returns the timestamp when the packet was originally sent and its size, if found.
752    pub async fn ack(&self, stream_offset: SequenceNumber) -> Option<(tokio::time::Instant, u64)> {
753        let mut buffer = self.send_buffer.lock().await;
754        let mut result = None;
755
756        // Find the segment (by gap-free `stream_offset`, A.5) and get its sent_at.
757        if let Some(pos) = buffer.iter().position(|p| p.stream_offset == stream_offset) {
758            let sent_at = buffer[pos].sent_at;
759            let retries = buffer[pos].retries;
760            let size = buffer[pos].data.len() as u64;
761            buffer.remove(pos);
762
763            // Released space, add permit back
764            self.send_semaphore.add_permits(1);
765
766            if let Some(sent_at) = sent_at {
767                result = Some((sent_at, size));
768                // Karn's algorithm: only sample RTT from segments that were not
769                // retransmitted — an ACK for a resent sequence is ambiguous.
770                if retries == 0 {
771                    let rtt = tokio::time::Instant::now().duration_since(sent_at);
772                    self.record_rtt_sample(rtt);
773                }
774            }
775        }
776
777        result
778    }
779
780    /// Reset a still-buffered reliable segment's send timestamp so the next
781    /// [`poll_send`](Self::poll_send) re-offers it immediately (as an unsent
782    /// segment) rather than waiting a full RTO for the retransmit pass. Used
783    /// when a send attempt failed *after* `poll_send` had already stamped
784    /// `sent_at` — the bytes never reached the wire, so the segment must not be
785    /// treated as in-flight. No-op if the segment was already acknowledged and
786    /// removed.
787    pub async fn mark_unsent(&self, stream_offset: SequenceNumber) {
788        let mut buffer = self.send_buffer.lock().await;
789        if let Some(pending) = buffer.iter_mut().find(|p| p.stream_offset == stream_offset) {
790            pending.sent_at = None;
791        }
792    }
793
794    // ── SACK (selective acknowledgement) — L1-A / A.5 ──
795
796    /// Build a [`Sack`] describing exactly the reliable-data sequences this stream
797    /// currently holds, derived from the **reorder state** (single source of truth):
798    /// the contiguous delivered run `[0, recv_sequence-1]` as one range, plus one
799    /// range per out-of-order island still buffered in `recv_buffer`. Returns
800    /// `None` if nothing has been received yet.
801    ///
802    /// Because the SACK is derived from what the reorder buffer actually holds, the
803    /// receiver never SACKs a sequence it has dropped (the SACK-without-data hazard
804    /// of a separate received-set). `ack_delay_us`: the caller's measured value, or
805    /// — when `0` — a coarse `now − last_data_recv_at` so the on-wire field is
806    /// populated. The range set is capped to [`crate::transport::sack::MAX_SACK_RANGES`]
807    /// by [`Sack::from_inclusive_ranges`] so it always decodes at the peer.
808    pub async fn received_sack(&self, ack_delay_us: u32) -> Option<Sack> {
809        let next = self.recv_sequence.load(Ordering::SeqCst);
810        let buf = self.recv_buffer.lock().await;
811        if next == 0 && buf.is_empty() {
812            return None;
813        }
814        // Contiguous delivered run first (lowest), then the buffered islands
815        // (all strictly above `next`, since `next` itself is the missing hole).
816        let mut ranges: Vec<(u32, u32)> = Vec::new();
817        if next > 0 {
818            ranges.push((0, next - 1));
819        }
820        let mut islands: Vec<SequenceNumber> = buf.iter().map(|(s, _)| *s).collect();
821        drop(buf);
822        islands.sort_unstable();
823        for s in islands {
824            match ranges.last_mut() {
825                // Coalesce adjacent / duplicate into the previous ascending range.
826                Some(last) if s <= last.1.saturating_add(1) => {
827                    if s > last.1 {
828                        last.1 = s;
829                    }
830                }
831                _ => ranges.push((s, s)),
832            }
833        }
834
835        let delay = if ack_delay_us != 0 {
836            ack_delay_us
837        } else {
838            // Coarse fallback: time since the most recent data arrival.
839            let recv_at = match self.last_data_recv_at.lock() {
840                Ok(g) => *g,
841                Err(poisoned) => *poisoned.into_inner(),
842            };
843            recv_at
844                .map(|t| {
845                    let micros = tokio::time::Instant::now().duration_since(t).as_micros();
846                    u32::try_from(micros).unwrap_or(u32::MAX)
847                })
848                .unwrap_or(0)
849        };
850        Sack::from_inclusive_ranges(ranges, delay)
851    }
852
853    /// Process a received SACK, retiring **every** buffered reliable segment whose
854    /// gap-free `stream_offset` the SACK covers (A.5; the SACK ranges are over
855    /// `stream_offset`, not the control-frame-holed wire `sequence`). Returns a
856    /// [`SackResult`] listing the newly-retired segments so the caller can feed
857    /// congestion control / the RTT estimator per segment.
858    ///
859    /// RTT is sampled here (Karn's algorithm) only for segments that were never
860    /// retransmitted (`retries == 0`); `RetiredSegment::was_retransmit` marks the
861    /// rest so the caller does not double-count or use an ambiguous sample.
862    ///
863    /// This is a cumulative retire: a SACK re-acks every still-buffered offset it
864    /// covers, so a lost ACK no longer strands a segment — the next SACK retires
865    /// it. **No loss detection / fast-retransmit here** — that is L1-B.
866    pub async fn on_sack(&self, sack: &Sack) -> SackResult {
867        let mut buffer = self.send_buffer.lock().await;
868        let mut retired = Vec::new();
869        let mut freed = 0u32;
870        let now = tokio::time::Instant::now();
871
872        // Retain only the segments the SACK does NOT cover; collect the rest.
873        let mut i = 0;
874        while i < buffer.len() {
875            // SACK ranges are over the gap-free reliable `stream_offset` (A.5),
876            // NOT the wire `sequence` (which has control-frame holes).
877            // PANIC-SAFETY: `i < buffer.len()` is the loop guard, so the index is
878            // in range; `get` cannot return `None`.
879            #[allow(clippy::unwrap_used, clippy::disallowed_methods)]
880            let covered = sack.acks(buffer.get(i).unwrap().stream_offset);
881            if covered {
882                // PANIC-SAFETY: `i` is a valid index (loop guard); `remove`
883                // returns `Some` for an in-range index in a VecDeque.
884                #[allow(clippy::unwrap_used, clippy::disallowed_methods)]
885                let pending = buffer.remove(i).unwrap();
886                freed += 1;
887                let was_retransmit = pending.retries > 0;
888                let size = pending.data.len() as u64;
889                if let Some(sent_at) = pending.sent_at {
890                    // Karn: only sample RTT from segments never retransmitted.
891                    if !was_retransmit {
892                        let rtt = now.duration_since(sent_at);
893                        self.record_rtt_sample(rtt);
894                    }
895                }
896                retired.push(RetiredSegment {
897                    sent_at: pending.sent_at,
898                    size,
899                    was_retransmit,
900                });
901                // Do NOT advance `i`: `remove` shifted the next element into `i`.
902            } else {
903                i += 1;
904            }
905        }
906
907        // Loss detection (RFC 9002 §6.1.1) over the still-buffered, in-flight
908        // segments, keyed on the gap-free `stream_offset`: declare lost any offset
909        // at least `PACKET_THRESHOLD` behind `largest_acked` (packet-threshold), or
910        // — if an srtt is known — any offset below `largest_acked` aged past
911        // srtt·9/8 (RACK time-threshold). Flagged segments are fast-retransmitted
912        // by `poll_send`'s Pass-0; already-flagged ones are skipped (no double-count
913        // into congestion control).
914        // T5.4: clamp `largest_acked` to the highest `stream_offset` we have actually assigned
915        // (`reliable_offset` is the next-to-assign, so it bounds every offset on the wire). A
916        // peer cannot legitimately ack an offset we never sent; without this an authenticated
917        // peer inflating `largest_acked` (e.g. `high + 1e6`) would declare freshly-sent,
918        // in-flight segments "lost" and force a cwnd-bypassing Pass-0 retransmit storm.
919        let largest_acked = sack
920            .largest_acked
921            .min(self.reliable_offset.load(Ordering::SeqCst));
922        // RFC 9002: loss_delay = max(kGranularity, kTimeThreshold · smoothed_rtt).
923        // The kGranularity (1 ms) floor is load-bearing: without it a near-zero
924        // srtt makes the threshold ~0 and flags freshly-sent segments as "aged",
925        // which would over-report loss.
926        let time_threshold = self
927            .smoothed_rtt()
928            .map(|r| std::cmp::max(Duration::from_millis(1), r * 9 / 8));
929        let mut lost = Vec::new();
930        for pending in buffer.iter_mut() {
931            if pending.lost {
932                continue;
933            }
934            let Some(sent_at) = pending.sent_at else {
935                continue; // not yet on the wire — nothing to lose
936            };
937            if pending.stream_offset >= largest_acked {
938                continue; // not behind the largest ack — still legitimately in flight
939            }
940            let packet_lost =
941                largest_acked >= pending.stream_offset.saturating_add(PACKET_THRESHOLD);
942            let time_lost = time_threshold.is_some_and(|t| now.duration_since(sent_at) >= t);
943            if packet_lost || time_lost {
944                pending.lost = true;
945                lost.push(LostSegment {
946                    stream_offset: pending.stream_offset,
947                    size: pending.data.len() as u64,
948                });
949            }
950        }
951        drop(buffer);
952
953        // Return the buffer permits for every retired segment in one shot.
954        if freed > 0 {
955            self.send_semaphore.add_permits(freed as usize);
956        }
957
958        SackResult { retired, lost }
959    }
960
961    // ── Receive-side in-order reassembly (A.5) ──
962
963    /// Accept reliable data payloads carried at `sequence` and return the
964    /// contiguous in-order run now deliverable to the application, in ascending
965    /// order. The returned `Vec` is empty when this is a future hole (buffered for
966    /// later), a duplicate, or refused for capacity.
967    ///
968    /// `payloads` is normally one element (a single RELIABLE frame); a COALESCED
969    /// bundle passes its sub-payloads so the whole bundle occupies one cursor
970    /// position. This is the **single source of truth** for receive ordering: the
971    /// live data pump routes every reliable app payload through here so the app
972    /// sees the reliable stream strictly in `sequence` order even over a
973    /// reordering (UDP) path. Out-of-order segments are held in `recv_buffer`
974    /// (bounded by `MAX_RECV_REORDER`); the data-arrival instant is stamped for
975    /// the SACK `ack_delay_us`.
976    pub async fn accept_in_order(
977        &self,
978        sequence: SequenceNumber,
979        payloads: Vec<Bytes>,
980    ) -> Vec<Bytes> {
981        {
982            let mut at = match self.last_data_recv_at.lock() {
983                Ok(g) => g,
984                Err(poisoned) => poisoned.into_inner(),
985            };
986            *at = Some(tokio::time::Instant::now());
987        }
988
989        let expected = self.recv_sequence.load(Ordering::SeqCst);
990        if sequence < expected {
991            return Vec::new(); // duplicate of already-delivered data
992        }
993
994        let mut buf = self.recv_buffer.lock().await;
995        if sequence != expected {
996            // Future segment: buffer if not already held, within the entry cap, AND within
997            // the per-stream byte budget (H-3). A refused segment is NOT recorded, so it is
998            // not SACKed → the sender retransmits it (no SACK-without-data hazard, bounded
999            // memory regardless of per-entry frame size).
1000            let already = buf.iter().any(|(s, _)| *s == sequence);
1001            let seg_bytes: usize = payloads.iter().map(Bytes::len).sum();
1002            let within_byte_budget = self
1003                .recv_buffer_bytes
1004                .load(Ordering::Relaxed)
1005                .saturating_add(seg_bytes)
1006                <= MAX_RECV_REORDER_BYTES;
1007            if !already && buf.len() < MAX_RECV_REORDER && within_byte_budget {
1008                buf.push_back((sequence, payloads));
1009                self.recv_buffer_bytes
1010                    .fetch_add(seg_bytes, Ordering::Relaxed);
1011            }
1012            return Vec::new();
1013        }
1014
1015        // In-order: deliver this segment's payloads, then drain any now-contiguous
1016        // buffered segments.
1017        let mut out = payloads;
1018        self.recv_sequence.fetch_add(1, Ordering::SeqCst);
1019        loop {
1020            let next = self.recv_sequence.load(Ordering::SeqCst);
1021            if let Some(pos) = buf.iter().position(|(s, _)| *s == next) {
1022                // PANIC-SAFETY: `pos` was just returned by `position`, so the
1023                // index is valid; `recv_buf` is locked, so no concurrent drain.
1024                #[allow(clippy::unwrap_used, clippy::disallowed_methods)]
1025                let (_, payloads) = buf.remove(pos).unwrap();
1026                let seg_bytes: usize = payloads.iter().map(Bytes::len).sum();
1027                self.recv_buffer_bytes
1028                    .fetch_sub(seg_bytes, Ordering::Relaxed);
1029                out.extend(payloads);
1030                self.recv_sequence.fetch_add(1, Ordering::SeqCst);
1031            } else {
1032                break;
1033            }
1034        }
1035        out
1036    }
1037
1038    /// Total payload bytes currently held in the out-of-order reorder buffer (H-3). Bounded
1039    /// by `MAX_RECV_REORDER_BYTES`; exposed so the byte bound is observable/testable.
1040    pub fn recv_reorder_bytes(&self) -> usize {
1041        self.recv_buffer_bytes.load(Ordering::Relaxed)
1042    }
1043
1044    /// Pull-API adapter over [`accept_in_order`](Self::accept_in_order): buffer a
1045    /// single reliable payload for in-order reassembly and push the released run
1046    /// into `recv_ready` for [`recv`](Self::recv) / [`try_recv`](Self::try_recv).
1047    /// (Not used by the live session pump, which consumes the returned run
1048    /// directly; retained for the pull-style read API.)
1049    pub async fn on_receive(&self, sequence: SequenceNumber, data: Bytes) {
1050        let delivered = self.accept_in_order(sequence, vec![data]).await;
1051        if !delivered.is_empty() {
1052            let mut ready = self.recv_ready.lock().await;
1053            for d in delivered {
1054                ready.push_back(d);
1055            }
1056            drop(ready);
1057            self.recv_notify.notify_waiters();
1058        }
1059    }
1060
1061    /// Read data from the stream (async, waits if no data available)
1062    pub async fn recv(&self) -> Option<Bytes> {
1063        loop {
1064            {
1065                let mut ready = self.recv_ready.lock().await;
1066                if let Some(data) = ready.pop_front() {
1067                    return Some(data);
1068                }
1069
1070                // Check if stream is closed
1071                if self.remote_finished.load(Ordering::SeqCst) {
1072                    return None;
1073                }
1074            }
1075
1076            // Wait for new data
1077            self.recv_notify.notified().await;
1078        }
1079    }
1080
1081    /// Try to read data without waiting
1082    pub async fn try_recv(&self) -> Option<Bytes> {
1083        self.recv_ready.lock().await.pop_front()
1084    }
1085
1086    /// Mark local side as finished (no more data to send)
1087    pub async fn finish(&self) {
1088        self.local_finished.store(true, Ordering::SeqCst);
1089        self.update_state().await;
1090    }
1091
1092    /// Mark remote side as finished
1093    pub async fn on_remote_finish(&self) {
1094        self.remote_finished.store(true, Ordering::SeqCst);
1095        self.recv_notify.notify_waiters();
1096        self.update_state().await;
1097    }
1098
1099    /// Update stream state based on finish flags
1100    async fn update_state(&self) {
1101        let local = self.local_finished.load(Ordering::SeqCst);
1102        let remote = self.remote_finished.load(Ordering::SeqCst);
1103
1104        let new_state = match (local, remote) {
1105            (true, true) => StreamState::Closed,
1106            (true, false) => StreamState::HalfClosedLocal,
1107            (false, true) => StreamState::HalfClosedRemote,
1108            (false, false) => StreamState::Open,
1109        };
1110
1111        *self.state.lock().await = new_state;
1112    }
1113
1114    /// Get number of pending send chunks
1115    pub async fn pending_send_count(&self) -> usize {
1116        self.send_buffer.lock().await.len()
1117    }
1118
1119    /// Get number of pending receive chunks
1120    pub async fn pending_recv_count(&self) -> usize {
1121        self.recv_ready.lock().await.len()
1122    }
1123
1124    /// Check if stream is closed
1125    pub fn is_closed(&self) -> bool {
1126        self.local_finished.load(Ordering::SeqCst) && self.remote_finished.load(Ordering::SeqCst)
1127    }
1128}
1129
1130impl std::fmt::Debug for Stream {
1131    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1132        f.debug_struct("Stream")
1133            .field("id", &self.id)
1134            .field("recv_offset", &self.recv_sequence.load(Ordering::Relaxed))
1135            .field("priority", &self.priority.load(Ordering::Relaxed))
1136            .finish()
1137    }
1138}
1139
1140#[cfg(test)]
1141mod tests {
1142    use super::*;
1143
1144    #[tokio::test]
1145    async fn test_stream_send_recv() {
1146        let stream = Stream::new(1);
1147
1148        // Send data
1149        stream.send_reliable(Bytes::from("hello")).await.unwrap();
1150        stream.send_reliable(Bytes::from("world")).await.unwrap();
1151
1152        // Check pending
1153        assert_eq!(stream.pending_send_count().await, 2);
1154
1155        // Poll send twice, the second should be None because it's already sent and hasn't timed out
1156        let seg = stream.poll_send(u64::MAX).await.unwrap();
1157        assert_eq!(seg.stream_offset, 0);
1158        assert_eq!(seg.data, Bytes::from("hello"));
1159        assert!(seg.reliable);
1160        assert!(!seg.retransmit);
1161
1162        let seg2 = stream.poll_send(u64::MAX).await.unwrap();
1163        assert_eq!(seg2.stream_offset, 1);
1164        assert_eq!(seg2.data, Bytes::from("world"));
1165        assert!(seg2.reliable);
1166        assert!(!seg2.retransmit);
1167
1168        assert!(stream.poll_send(u64::MAX).await.is_none());
1169    }
1170
1171    /// T4.5 (reviewer §1, `stream_offset`): the gap-free reliable offset is a `u32`
1172    /// assigned per reliable segment. A naive `fetch_add(1)` silently wraps `u32::MAX`
1173    /// back to `0`, colliding with the first segment's offset and corrupting
1174    /// reassembly / SACK dedup (a duplicate offset, NOT a nonce reuse — the AEAD nonce
1175    /// is the `u64` packet number). It must fail-closed instead — mirroring the epoch
1176    /// saturation guard in `Session::rekey` — so an exhausted stream refuses new
1177    /// reliable data rather than corrupting the stream.
1178    #[tokio::test]
1179    async fn reliable_offset_fails_closed_at_u32_exhaustion() {
1180        let stream = Stream::new(1);
1181
1182        // The last assignable offset is `u32::MAX - 1`; assigning it leaves the cursor
1183        // at `u32::MAX`, the exhaustion sentinel.
1184        stream.reliable_offset.store(u32::MAX - 1, Ordering::SeqCst);
1185        let last = stream
1186            .send_reliable(Bytes::from_static(b"a"))
1187            .await
1188            .expect("offset u32::MAX-1 must still be assignable");
1189        assert_eq!(last, u32::MAX - 1, "last assignable reliable offset");
1190
1191        // The next send must fail-closed — never wrap to 0.
1192        let exhausted = stream.send_reliable(Bytes::from_static(b"b")).await;
1193        assert!(
1194            matches!(exhausted, Err(crate::errors::CoreError::StreamError(_))),
1195            "send_reliable must fail-closed (StreamError) at u32 offset exhaustion, got {exhausted:?}"
1196        );
1197
1198        // And directly at the sentinel.
1199        stream.reliable_offset.store(u32::MAX, Ordering::SeqCst);
1200        let at_sentinel = stream.send_reliable(Bytes::from_static(b"c")).await;
1201        assert!(
1202            at_sentinel.is_err(),
1203            "send_reliable at the u32::MAX sentinel must fail-closed, got {at_sentinel:?}"
1204        );
1205    }
1206
1207    #[tokio::test]
1208    async fn test_stream_retransmission() {
1209        // We use tokio::time::pause to mock time and test timeout
1210        tokio::time::pause();
1211        let stream = Stream::new(1);
1212
1213        stream.send_reliable(Bytes::from("hello")).await.unwrap();
1214
1215        // First send — not a retransmission.
1216        let seg = stream.poll_send(u64::MAX).await.unwrap();
1217        assert_eq!(seg.stream_offset, 0);
1218        assert!(seg.reliable);
1219        assert!(!seg.retransmit);
1220
1221        // Immediate poll should be None
1222        assert!(stream.poll_send(u64::MAX).await.is_none());
1223
1224        // Advance 400ms — still under the initial 1s RTO (RFC 6298 (2.1):
1225        // no RTT samples yet, so the timer sits at the 1-second default).
1226        tokio::time::advance(std::time::Duration::from_millis(400)).await;
1227        assert!(stream.poll_send(u64::MAX).await.is_none());
1228
1229        // Advance past the 1s initial RTO (total ~1.1s).
1230        tokio::time::advance(std::time::Duration::from_millis(700)).await;
1231
1232        // Now it should retransmit — flagged as a retransmission.
1233        let seg2 = stream.poll_send(u64::MAX).await.unwrap();
1234        assert_eq!(seg2.stream_offset, 0);
1235        assert_eq!(seg2.data, Bytes::from("hello"));
1236        assert!(seg2.reliable);
1237        assert!(seg2.retransmit);
1238
1239        // Ack it
1240        let acked = stream.ack(0).await;
1241        assert!(acked.is_some());
1242
1243        // Poll again - queue is empty
1244        assert!(stream.poll_send(u64::MAX).await.is_none());
1245    }
1246
1247    #[tokio::test]
1248    async fn mark_unsent_re_offers_without_waiting_rto() {
1249        // Time is paused, so nothing ever crosses the RTO — any re-offer here is
1250        // due to `mark_unsent`, not the retransmit timer.
1251        tokio::time::pause();
1252        let stream = Stream::new(1);
1253        stream.send_reliable(Bytes::from("hello")).await.unwrap();
1254
1255        // First poll stamps `sent_at`; an immediate re-poll yields nothing
1256        // (treated as in-flight, not yet timed out).
1257        let seg = stream.poll_send(u64::MAX).await.unwrap();
1258        assert_eq!(seg.stream_offset, 0);
1259        assert!(!seg.retransmit);
1260        assert!(stream.poll_send(u64::MAX).await.is_none());
1261
1262        // Simulate a send that failed *after* `poll_send` stamped the segment:
1263        // clear `sent_at` so it is no longer considered in-flight.
1264        stream.mark_unsent(0).await;
1265
1266        // It is re-offered immediately — without advancing past the RTO — and as
1267        // a fresh send (Pass 2), not a retransmission.
1268        let seg2 = stream.poll_send(u64::MAX).await.unwrap();
1269        assert_eq!(seg2.stream_offset, 0);
1270        assert_eq!(seg2.data, Bytes::from("hello"));
1271        assert!(seg2.reliable);
1272        assert!(!seg2.retransmit);
1273
1274        // `mark_unsent` on an already-acked (removed) segment is a no-op.
1275        assert!(stream.ack(0).await.is_some());
1276        stream.mark_unsent(0).await; // no panic, no effect
1277        assert!(stream.poll_send(u64::MAX).await.is_none());
1278    }
1279
1280    #[tokio::test]
1281    async fn poll_send_respects_the_cwnd_budget() {
1282        let stream = Stream::new(1);
1283        stream
1284            .send_reliable(Bytes::from("0123456789"))
1285            .await
1286            .unwrap(); // 10 bytes
1287        stream.send_reliable(Bytes::from("abcde")).await.unwrap(); // 5 bytes
1288
1289        // Budget of 10 admits the 10-byte head segment.
1290        let seg = stream.poll_send(10).await.unwrap();
1291        assert_eq!(seg.data.len(), 10);
1292        assert!(!seg.retransmit);
1293
1294        // Budget of 4 is too small for the next (5-byte) segment → withheld.
1295        assert!(stream.poll_send(4).await.is_none());
1296
1297        // A budget of 5 now admits it.
1298        let seg2 = stream.poll_send(5).await.unwrap();
1299        assert_eq!(seg2.data, Bytes::from("abcde"));
1300    }
1301
1302    #[tokio::test]
1303    async fn test_stream_in_order_receive() {
1304        let stream = Stream::new(1);
1305
1306        // Receive in order
1307        stream.on_receive(0, Bytes::from("first")).await;
1308        stream.on_receive(1, Bytes::from("second")).await;
1309
1310        assert_eq!(stream.try_recv().await, Some(Bytes::from("first")));
1311        assert_eq!(stream.try_recv().await, Some(Bytes::from("second")));
1312        assert_eq!(stream.try_recv().await, None);
1313    }
1314
1315    #[tokio::test]
1316    async fn test_stream_out_of_order_receive() {
1317        let stream = Stream::new(1);
1318
1319        // Receive out of order
1320        stream.on_receive(1, Bytes::from("second")).await;
1321        stream.on_receive(0, Bytes::from("first")).await;
1322
1323        // Should be reordered
1324        assert_eq!(stream.try_recv().await, Some(Bytes::from("first")));
1325        assert_eq!(stream.try_recv().await, Some(Bytes::from("second")));
1326    }
1327
1328    #[tokio::test]
1329    async fn test_stream_state() {
1330        let stream = Stream::new(1);
1331
1332        assert_eq!(stream.state().await, StreamState::Open);
1333
1334        stream.finish().await;
1335        assert_eq!(stream.state().await, StreamState::HalfClosedLocal);
1336
1337        stream.on_remote_finish().await;
1338        assert_eq!(stream.state().await, StreamState::Closed);
1339        assert!(stream.is_closed());
1340    }
1341
1342    #[tokio::test]
1343    async fn test_stream_backpressure() {
1344        let stream = Stream::new(1);
1345
1346        // Fill the buffer
1347        for _ in 0..MAX_PENDING_PACKETS {
1348            stream.send_reliable(Bytes::from("data")).await.unwrap();
1349        }
1350
1351        assert_eq!(stream.pending_send_count().await, MAX_PENDING_PACKETS);
1352
1353        // Try to send one more with timeout
1354        let send_future = stream.send_reliable(Bytes::from("blocked"));
1355        let result = tokio::time::timeout(std::time::Duration::from_millis(100), send_future).await;
1356        assert!(result.is_err(), "Send should have blocked");
1357
1358        // Ack one
1359        stream.ack(0).await;
1360
1361        // Now it should succeed
1362        let send_future = stream.send_reliable(Bytes::from("resumed"));
1363        let result = tokio::time::timeout(std::time::Duration::from_millis(100), send_future).await;
1364        assert!(result.is_ok(), "Send should have succeeded after ack");
1365        assert_eq!(stream.pending_send_count().await, MAX_PENDING_PACKETS);
1366    }
1367
1368    // ── SACK (selective acknowledgement) — L1-A ──
1369
1370    /// Stage segments 0..=5 on the send buffer, feed a SACK that covers
1371    /// {0,1,2,4,5} (gap at 3), and assert it retires exactly those five segments,
1372    /// leaving only segment 3 buffered. This is the headline L1-A behaviour: a
1373    /// single SACK retires multiple segments at once, skipping the gap.
1374    #[tokio::test]
1375    async fn on_sack_retires_all_covered_segments_skipping_the_gap() {
1376        let stream = Stream::new(1);
1377        for i in 0..6u32 {
1378            let seq = stream
1379                .send_reliable(Bytes::from(format!("seg-{i}")))
1380                .await
1381                .unwrap();
1382            assert_eq!(seq, i);
1383            // Stamp it as in-flight so RTT sampling has a `sent_at`.
1384            let seg = stream.poll_send(u64::MAX).await.expect("poll");
1385            assert_eq!(seg.stream_offset, i);
1386        }
1387        assert_eq!(stream.pending_send_count().await, 6);
1388
1389        // SACK covers {0,1,2,4,5} — segment 3 is the gap.
1390        let sack = Sack::from_received(&[0, 1, 2, 4, 5], 1234).expect("sack");
1391        assert_eq!(sack.ranges(), &[(4, 5), (0, 2)]);
1392        let result = stream.on_sack(&sack).await;
1393
1394        // Five segments retired, none of them retransmissions.
1395        assert_eq!(result.retired.len(), 5);
1396        assert!(result.retired.iter().all(|r| !r.was_retransmit));
1397        assert!(result.retired.iter().all(|r| r.sent_at.is_some()));
1398
1399        // Only segment 3 remains buffered.
1400        assert_eq!(stream.pending_send_count().await, 1);
1401        // Re-acking the retired sequences finds nothing (already removed); seq 3
1402        // is still ackable.
1403        for retired_seq in [0u32, 1, 2, 4, 5] {
1404            assert!(
1405                stream.ack(retired_seq).await.is_none(),
1406                "seq {retired_seq} should already be retired by the SACK"
1407            );
1408        }
1409        assert!(
1410            stream.ack(3).await.is_some(),
1411            "the gap segment 3 must remain buffered"
1412        );
1413    }
1414
1415    /// T5.4 (audit SACK-storm LOW): a SACK's `largest_acked` is clamped to the highest
1416    /// stream_offset actually sent, so an authenticated peer can't inflate it (e.g.
1417    /// `high + 1e6`) to declare freshly-sent, legitimately-in-flight segments "lost" and force
1418    /// a cwnd-bypassing Pass-0 retransmit storm.
1419    #[tokio::test]
1420    async fn on_sack_clamps_inflated_largest_acked() {
1421        let stream = Stream::new(1);
1422        for i in 0..5u32 {
1423            let seq = stream
1424                .send_reliable(Bytes::from(format!("seg-{i}")))
1425                .await
1426                .unwrap();
1427            assert_eq!(seq, i);
1428            let seg = stream.poll_send(u64::MAX).await.expect("poll"); // stamps sent_at
1429            assert_eq!(seg.stream_offset, i);
1430        }
1431        // A SACK that acks NONE of our segments (0..5) but claims a `largest_acked` far beyond
1432        // anything we ever sent.
1433        let sack = Sack::from_received(&[1_000_000], 0).expect("sack");
1434        assert_eq!(sack.largest_acked, 1_000_000);
1435        let result = stream.on_sack(&sack).await;
1436        // The freshest in-flight segment (within PACKET_THRESHOLD of the highest sent) must NOT
1437        // be flagged lost — the clamp limits loss detection to the real sent range.
1438        assert!(
1439            !result.lost.iter().any(|l| l.stream_offset == 4),
1440            "an inflated largest_acked must not flag the freshest in-flight segment as lost"
1441        );
1442    }
1443
1444    /// A SACK that covers nothing still buffered (stale / duplicate) retires
1445    /// nothing and leaves the send buffer intact.
1446    #[tokio::test]
1447    async fn on_sack_for_unbuffered_sequences_retires_nothing() {
1448        let stream = Stream::new(1);
1449        stream.send_reliable(Bytes::from("zero")).await.unwrap(); // seq 0
1450        let _ = stream.poll_send(u64::MAX).await.expect("poll");
1451
1452        // SACK only covers high sequences we never sent.
1453        let sack = Sack::from_received(&[100, 101, 102], 0).expect("sack");
1454        let result = stream.on_sack(&sack).await;
1455        assert!(result.retired.is_empty());
1456        assert_eq!(stream.pending_send_count().await, 1);
1457    }
1458
1459    /// A retransmitted segment retired by a SACK is flagged `was_retransmit`, so
1460    /// the caller does not sample RTT from it (Karn's algorithm).
1461    #[tokio::test]
1462    async fn on_sack_flags_retransmits_for_karn() {
1463        tokio::time::pause();
1464        let stream = Stream::new(1);
1465        stream.send_reliable(Bytes::from("payload")).await.unwrap(); // seq 0
1466        let _ = stream.poll_send(u64::MAX).await.expect("first send");
1467
1468        // Force a retransmit by crossing the RTO, so retries > 0.
1469        tokio::time::advance(Duration::from_millis(1100)).await;
1470        let retx = stream.poll_send(u64::MAX).await.expect("retransmit");
1471        assert!(retx.retransmit);
1472
1473        let sack = Sack::from_received(&[0], 0).expect("sack");
1474        let result = stream.on_sack(&sack).await;
1475        assert_eq!(result.retired.len(), 1);
1476        assert!(
1477            result.retired[0].was_retransmit,
1478            "a retransmitted segment must be flagged so the caller skips RTT sampling"
1479        );
1480    }
1481
1482    // ── L1-B: loss detection (RFC 9002) + fast-retransmit ──
1483
1484    /// **L1-B packet-threshold loss + Pass-0 fast-retransmit.** Stage offsets
1485    /// 0..=5 in flight; a SACK acking only {4,5} declares every still-buffered
1486    /// offset ≤ largest_acked − PACKET_THRESHOLD(3) = 2 lost (0,1,2), leaving 3
1487    /// unflagged. `poll_send`'s Pass-0 then fast-retransmits a flagged-lost segment
1488    /// even with a CLOSED congestion window (cwnd_budget = 0), ahead of new data.
1489    #[tokio::test]
1490    async fn on_sack_packet_threshold_marks_lost_and_pass0_fast_retransmits() {
1491        // Pause time so no segment ages past the 1 ms time-threshold floor — this
1492        // isolates the PACKET-threshold (the time-threshold has its own test).
1493        tokio::time::pause();
1494        let stream = Stream::new(1);
1495        for _ in 0..6u32 {
1496            stream
1497                .send_reliable(Bytes::from_static(b"x"))
1498                .await
1499                .unwrap();
1500            let _ = stream.poll_send(u64::MAX).await.expect("in-flight");
1501        }
1502        // SACK acks offsets {4,5}: 0,1,2 are ≤ 5−3 → lost; 3 is within threshold.
1503        let sack = Sack::from_received(&[4, 5], 0).expect("sack");
1504        let result = stream.on_sack(&sack).await;
1505        assert_eq!(
1506            result.lost_offsets(),
1507            vec![0, 1, 2],
1508            "packet-threshold must flag every offset ≤ largest_acked − 3"
1509        );
1510        // Pass-0 re-sends a flagged segment even with a closed congestion window.
1511        let seg = stream
1512            .poll_send(0)
1513            .await
1514            .expect("Pass-0 fast-retransmit must ignore the congestion window");
1515        assert!(seg.retransmit, "Pass-0 segment is a retransmit");
1516        assert!(
1517            [0u32, 1, 2].contains(&seg.stream_offset),
1518            "a flagged-lost offset is fast-retransmitted (got {})",
1519            seg.stream_offset
1520        );
1521    }
1522
1523    /// **L1-B time-threshold (RACK) loss.** With an established srtt, a
1524    /// still-buffered segment older than srtt·9/8 is declared lost once a LATER
1525    /// segment is acked, even when the packet threshold cannot fire (fewer than 3
1526    /// newer offsets acked). Offsets 0 and 1 are in flight; a SACK acks only {1}
1527    /// (largest_acked = 1, so 0 is within the packet threshold) but 0 has aged past
1528    /// srtt·9/8 → lost by time-threshold.
1529    #[tokio::test]
1530    async fn on_sack_time_threshold_marks_aged_segment_lost() {
1531        tokio::time::pause();
1532        let stream = Stream::new(1);
1533        // Establish a small srtt: send offset 0, ack it after ~10 ms.
1534        stream
1535            .send_reliable(Bytes::from_static(b"a"))
1536            .await
1537            .unwrap(); // offset 0
1538        let _ = stream.poll_send(u64::MAX).await.expect("send 0");
1539        tokio::time::advance(Duration::from_millis(10)).await;
1540        let _ = stream
1541            .on_sack(&Sack::from_received(&[0], 0).expect("sack"))
1542            .await; // srtt ≈ 10 ms
1543
1544        // Send offsets 1 and 2; age them well past srtt·9/8 (≈ 11 ms).
1545        stream
1546            .send_reliable(Bytes::from_static(b"b"))
1547            .await
1548            .unwrap(); // offset 1
1549        stream
1550            .send_reliable(Bytes::from_static(b"c"))
1551            .await
1552            .unwrap(); // offset 2
1553        let _ = stream.poll_send(u64::MAX).await.expect("send 1");
1554        let _ = stream.poll_send(u64::MAX).await.expect("send 2");
1555        tokio::time::advance(Duration::from_millis(50)).await;
1556
1557        // SACK acks only {2} (largest_acked = 2). Offset 1 is within the packet
1558        // threshold (2 − 1 < 3) but aged past srtt·9/8 → lost by time-threshold.
1559        let result = stream
1560            .on_sack(&Sack::from_received(&[2], 0).expect("sack"))
1561            .await;
1562        assert_eq!(
1563            result.lost_offsets(),
1564            vec![1],
1565            "an aged unacked segment must be flagged by the time-threshold"
1566        );
1567    }
1568
1569    /// `received_sack` derives ranges from the reorder state with a gap, and
1570    /// `ack_delay_us` is populated (non-zero) when the receiver holds before
1571    /// emitting (here, the coarse `now − recv_at` fallback under paused time).
1572    #[tokio::test]
1573    async fn received_sack_builds_ranges_with_gap_and_populates_ack_delay() {
1574        tokio::time::pause();
1575        let stream = Stream::new(1);
1576        // Receiver got 0,1,2,4,5 (gap at 3): 0,1,2 deliver in order (recv_sequence
1577        // → 3), 4 and 5 stay buffered as an island.
1578        for seq in [0u32, 1, 2, 4, 5] {
1579            let _ = stream
1580                .accept_in_order(seq, vec![Bytes::from_static(b"x")])
1581                .await;
1582        }
1583        // Hold briefly so `now − recv_at` is non-zero.
1584        tokio::time::advance(Duration::from_micros(500)).await;
1585
1586        let sack = stream
1587            .received_sack(0)
1588            .await
1589            .expect("non-empty received set");
1590        assert_eq!(sack.largest_acked, 5);
1591        // Contiguous run (0,2) plus the buffered island (4,5), descending.
1592        assert_eq!(sack.ranges(), &[(4, 5), (0, 2)]);
1593        assert!(
1594            sack.ack_delay_us >= 500,
1595            "ack_delay_us must be populated from the recv-to-emit hold (got {})",
1596            sack.ack_delay_us
1597        );
1598
1599        // An explicit (non-zero) ack_delay passes through verbatim.
1600        let sack2 = stream.received_sack(42).await.expect("non-empty");
1601        assert_eq!(sack2.ack_delay_us, 42);
1602    }
1603
1604    /// Nothing received yet yields no SACK.
1605    #[tokio::test]
1606    async fn received_sack_empty_returns_none() {
1607        let stream = Stream::new(1);
1608        assert!(stream.received_sack(0).await.is_none());
1609    }
1610
1611    /// `accept_in_order` delivers the contiguous run and buffers holes: feeding
1612    /// 0, then 2, then 1 yields `[0]`, `[]` (2 buffered), `[1, 2]` (1 fills the
1613    /// gap and drains the buffered 2) — strict in-order delivery.
1614    #[tokio::test]
1615    async fn accept_in_order_delivers_contiguous_run_and_buffers_holes() {
1616        let stream = Stream::new(1);
1617        let d0 = stream
1618            .accept_in_order(0, vec![Bytes::from_static(b"0")])
1619            .await;
1620        assert_eq!(d0, vec![Bytes::from_static(b"0")]);
1621        let d2 = stream
1622            .accept_in_order(2, vec![Bytes::from_static(b"2")])
1623            .await;
1624        assert!(
1625            d2.is_empty(),
1626            "seq 2 is a future hole — buffered, not delivered"
1627        );
1628        let d1 = stream
1629            .accept_in_order(1, vec![Bytes::from_static(b"1")])
1630            .await;
1631        assert_eq!(
1632            d1,
1633            vec![Bytes::from_static(b"1"), Bytes::from_static(b"2")],
1634            "filling the gap at 1 must release 1 then the buffered 2, in order"
1635        );
1636    }
1637
1638    /// `accept_in_order` drops duplicates of already-delivered sequences.
1639    #[tokio::test]
1640    async fn accept_in_order_drops_duplicates() {
1641        let stream = Stream::new(1);
1642        let _ = stream
1643            .accept_in_order(0, vec![Bytes::from_static(b"0")])
1644            .await;
1645        let _ = stream
1646            .accept_in_order(1, vec![Bytes::from_static(b"1")])
1647            .await;
1648        let dup = stream
1649            .accept_in_order(0, vec![Bytes::from_static(b"0")])
1650            .await;
1651        assert!(
1652            dup.is_empty(),
1653            "a duplicate of delivered data must release nothing"
1654        );
1655    }
1656
1657    /// A COALESCED bundle's multiple sub-payloads occupy ONE cursor position and
1658    /// are delivered together, in order, ahead of the next sequence.
1659    #[tokio::test]
1660    async fn accept_in_order_delivers_coalesced_bundle_as_one_cursor_position() {
1661        let stream = Stream::new(1);
1662        let bundle = vec![
1663            Bytes::from_static(b"A"),
1664            Bytes::from_static(b"B"),
1665            Bytes::from_static(b"C"),
1666        ];
1667        let d0 = stream.accept_in_order(0, bundle).await;
1668        assert_eq!(
1669            d0,
1670            vec![
1671                Bytes::from_static(b"A"),
1672                Bytes::from_static(b"B"),
1673                Bytes::from_static(b"C")
1674            ]
1675        );
1676        // The bundle consumed exactly one sequence; the next reliable frame is 1.
1677        let d1 = stream
1678            .accept_in_order(1, vec![Bytes::from_static(b"D")])
1679            .await;
1680        assert_eq!(d1, vec![Bytes::from_static(b"D")]);
1681    }
1682
1683    // ── Flow control (Phase 4.3) ──
1684
1685    #[test]
1686    fn peer_send_window_starts_at_initial() {
1687        let s = Stream::new(1);
1688        assert_eq!(s.peer_send_window(), INITIAL_STREAM_WINDOW);
1689    }
1690
1691    #[test]
1692    fn try_consume_send_window_decrements_atomically() {
1693        let s = Stream::new(1);
1694        assert!(s.try_consume_send_window(1000));
1695        assert_eq!(s.peer_send_window(), INITIAL_STREAM_WINDOW - 1000);
1696        assert!(s.try_consume_send_window(INITIAL_STREAM_WINDOW - 1000));
1697        assert_eq!(s.peer_send_window(), 0);
1698        // Further consumption fails until refilled.
1699        assert!(!s.try_consume_send_window(1));
1700    }
1701
1702    #[test]
1703    fn apply_peer_window_update_adds_relative_credit() {
1704        let s = Stream::new(1);
1705        // Drain to 100 bytes.
1706        assert!(s.try_consume_send_window(INITIAL_STREAM_WINDOW - 100));
1707        assert_eq!(s.peer_send_window(), 100);
1708
1709        // A WINDOW_UPDATE is a relative credit: it ADDS to the window.
1710        s.apply_peer_window_update(1000);
1711        assert_eq!(s.peer_send_window(), 1100);
1712        s.apply_peer_window_update(50);
1713        assert_eq!(s.peer_send_window(), 1150);
1714
1715        // Saturates at the hard cap (misbehaving-peer guard).
1716        s.apply_peer_window_update(u32::MAX);
1717        assert_eq!(s.peer_send_window(), MAX_SEND_WINDOW);
1718    }
1719
1720    #[test]
1721    fn record_app_consumed_grants_relative_credit_after_threshold() {
1722        let s = Stream::new(1);
1723        let threshold = INITIAL_STREAM_WINDOW / 2;
1724
1725        // Small drains return None.
1726        assert!(s.record_app_consumed(100).is_none());
1727        assert!(s.record_app_consumed(200).is_none());
1728
1729        // Drain across the half-window threshold → emit a credit equal to the
1730        // accumulated consumption (300 + threshold), NOT an absolute window.
1731        let credit = s.record_app_consumed(threshold);
1732        assert_eq!(
1733            credit,
1734            Some(300 + threshold),
1735            "WINDOW_UPDATE carries the relative credit (bytes consumed since last update)"
1736        );
1737
1738        // Counter resets after emitting — small further drains do not re-emit.
1739        assert!(s.record_app_consumed(10).is_none());
1740    }
1741
1742    #[test]
1743    fn relative_credit_round_trip_bounds_outstanding_to_one_window() {
1744        // Model: receiver grants credit == consumed; sender's window =
1745        // initial + Σcredit − Σsent, so outstanding (sent − consumed) ≤ initial.
1746        let sender = Stream::new(1);
1747        let receiver = Stream::new(1);
1748        let threshold = INITIAL_STREAM_WINDOW / 2;
1749
1750        // Sender fills the initial window exactly.
1751        assert!(sender.try_consume_send_window(INITIAL_STREAM_WINDOW));
1752        assert_eq!(sender.peer_send_window(), 0, "initial window exhausted");
1753
1754        // Receiver consumes one threshold's worth → grants that much credit.
1755        let credit = receiver
1756            .record_app_consumed(threshold)
1757            .expect("threshold crossed");
1758        sender.apply_peer_window_update(credit);
1759        assert_eq!(
1760            sender.peer_send_window(),
1761            threshold,
1762            "sender may now send exactly the bytes the receiver consumed"
1763        );
1764    }
1765
1766    #[test]
1767    fn staged_window_update_credit_accumulates_until_taken() {
1768        let s = Stream::new(1);
1769        assert_eq!(s.take_pending_window_update(), None);
1770
1771        // Two grants staged before a single flush must SUM, not overwrite: the
1772        // send loop (sole emitter) may run arbitrarily late after a credit is
1773        // staged, so back-to-back grants would otherwise lose all but the last
1774        // — a permanent credit leak that shrinks the peer's window over time.
1775        s.stage_window_update_credit(1000);
1776        s.stage_window_update_credit(2500);
1777        assert_eq!(s.take_pending_window_update(), Some(3500));
1778
1779        // The slot resets to empty once taken.
1780        assert_eq!(s.take_pending_window_update(), None);
1781
1782        // Accumulation saturates instead of wrapping past u32::MAX.
1783        s.stage_window_update_credit(u32::MAX);
1784        s.stage_window_update_credit(10);
1785        assert_eq!(s.take_pending_window_update(), Some(u32::MAX));
1786
1787        // Zero credit is a no-op (no spurious WINDOW_UPDATE).
1788        s.stage_window_update_credit(0);
1789        assert_eq!(s.take_pending_window_update(), None);
1790    }
1791}
phantom_protocol/transport/stream.rs

phantom_protocol/transport/
stream.rs