sozu_lib/protocol/mux/
h2.rs

1//! H2 mux connection wrapper (RFC 9113).
2//!
3//! Owns wire-side connection state: HPACK encoder/decoder, peer settings,
4//! flow window, GOAWAY/RST attribution, and the [`H2FloodDetector`] backing
5//! the CVE-2023-44487 / CVE-2024-27316 / CVE-2025-8671 mitigations. Stream
6//! storage lives in the sibling `Context<L>` (`mux/mod.rs`); this module is
7//! the canonical home for the edge-trigger discipline — paths that queue
8//! bytes for a later event-loop pass must arm writable / signal pending
9//! write (cf. `arm_writable()` at the deferred-control-frame sites and
10//! `lib/src/lib.rs:1006`-`1010`).
11
12use std::{
13    cmp::min,
14    collections::{HashMap, HashSet},
15    io::{IoSlice, Write as _},
16    time::{Duration, Instant},
17};
18
19/// Compile-time guard: `payload_len as usize` casts in the H2 parser assume at
20/// least 32-bit pointer width.  This prevents silent truncation on platforms
21/// with smaller pointers (e.g. 16-bit embedded targets).
22const _: () = assert!(
23    std::mem::size_of::<usize>() >= 4,
24    "sozu requires at least 32-bit pointers"
25);
26
27use rusty_ulid::Ulid;
28use sozu_command::{logging::ansi_palette, ready::Ready};
29
30use crate::metrics::names;
31use crate::{
32    L7ListenerHandler, ListenerHandler, Protocol, Readiness, SessionMetrics,
33    protocol::mux::{
34        BackendStatus, Context, DebugEvent, DebugHistory, Endpoint, GenericHttpStream,
35        GlobalStreamId, MuxResult, Position, Stream, StreamId, StreamState, converter,
36        forcefully_terminate_answer,
37        parser::{self, Frame, FrameHeader, FrameType, H2Error, Headers, WindowUpdate},
38        pkawa, remove_backend_stream, serializer, set_default_answer,
39        shared::{EndStreamAction, drain_tls_close_notify, end_stream_decision},
40        update_readiness_after_read, update_readiness_after_write,
41    },
42    socket::{SocketHandler, SocketResult, stats::socket_rtt},
43    timer::TimeoutContainer,
44};
45
46/// Protocol label + session descriptor used as a prefix on every
47/// [`ConnectionH2`] log line. Matches the RUSTLS log-context convention:
48/// `MUX-H2\tSession(...)\t >>>`. When colored output is enabled (via
49/// [`ansi_palette`]) the label is wrapped in bold bright-white ANSI (uniform
50/// across every protocol) and the session detail is rendered in light grey.
51///
52/// Fields included in the session block (chosen to surface the most common
53/// H2 troubleshooting axes — flow stall, leaked stream, draining state,
54/// peer-side gap, reset-flood exposure):
55/// - `peer` — peer address (or `None` if the socket is gone)
56/// - `position` — `Server` / `Client(...)` orientation
57/// - `state` — current [`H2State`]
58/// - `streams` — number of in-flight streams on this connection
59/// - `last_peer_id` — `highest_peer_stream_id` (gap to the peer's view)
60/// - `window` — connection-level send window (RFC 9113 §6.9)
61/// - `draining` — set after the first GOAWAY of a graceful shutdown
62/// - `total_rst_streams_emitted_lifetime` — MadeYouReset counter (CVE-2025-8671)
63/// - `total_rst_received_lifetime` — Rapid Reset counter (CVE-2023-44487)
64/// - `readiness` — connection-level mio readiness snapshot
65///
66/// Computed lazily on each callsite — the helper only materialises when the
67/// log level is enabled, so uncolored hot paths keep a single thread-local
68/// read (the colored check) and one `format!` allocation.
69macro_rules! log_context {
70    ($self:expr) => {{
71        let (open, reset, grey, gray, white) = ansi_palette();
72        format!(
73            "[{ulid} - - -]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
74            open = open,
75            reset = reset,
76            grey = grey,
77            gray = gray,
78            white = white,
79            ulid = $self.session_ulid,
80            peer = $self.socket.socket_ref().peer_addr().ok(),
81            position = $self.position,
82            state = $self.state,
83            streams = $self.streams.len(),
84            last_peer_id = $self.highest_peer_stream_id,
85            window = $self.flow_control.window,
86            draining = $self.drain.draining,
87            total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
88            total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
89            readiness = $self.readiness,
90        )
91    }};
92}
93
94/// Per-stream variant of [`log_context!`] used when a [`Stream`]'s
95/// [`HttpContext`](crate::protocol::kawa_h1::editor::HttpContext) is in
96/// scope. Populates the `request_id`, `cluster_id` and `backend_id` slots of
97/// the bracket so the log line can be filtered by the specific H2 stream it
98/// belongs to.
99#[allow(unused_macros)]
100macro_rules! log_context_stream {
101    ($self:expr, $http_context:expr) => {{
102        let (open, reset, grey, gray, white) = ansi_palette();
103        format!(
104            "[{ulid} {req} {cluster} {backend}]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
105            open = open,
106            reset = reset,
107            grey = grey,
108            gray = gray,
109            white = white,
110            ulid = $self.session_ulid,
111            req = $http_context.id,
112            cluster = $http_context.cluster_id.as_deref().unwrap_or("-"),
113            backend = $http_context.backend_id.as_deref().unwrap_or("-"),
114            peer = $self.socket.socket_ref().peer_addr().ok(),
115            position = $self.position,
116            state = $self.state,
117            streams = $self.streams.len(),
118            last_peer_id = $self.highest_peer_stream_id,
119            window = $self.flow_control.window,
120            draining = $self.drain.draining,
121            total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
122            total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
123            readiness = $self.readiness,
124        )
125    }};
126}
127
128/// Module-level prefix without session context, for logs emitted from
129/// free functions, `H2ConnectionConfig` validation and other sites where no
130/// `ConnectionH2` is in scope. Keeps the `MUX-H2` label consistent with
131/// connection logs and honours the colored flag.
132macro_rules! log_module_context {
133    () => {{
134        let (open, reset, _, _, _) = ansi_palette();
135        format!("{open}MUX-H2{reset}\t >>>", open = open, reset = reset)
136    }};
137}
138
139/// `if let Some(violation) = self.flood_detector.check_flood() { return self.handle_flood_violation(violation); }`
140/// pattern wrapped as a single statement. Pure dispatch — the actual flood
141/// thresholds and counters live inside `H2FloodDetector::check_flood` and
142/// `ConnectionH2::handle_flood_violation`, which the macro does not touch.
143/// Use this at every per-frame counter bump site so the wrapper stays
144/// uniform and a future grep for "flood-check forgot to return" finds zero.
145macro_rules! check_flood_or_return {
146    ($self:expr) => {
147        if let Some(violation) = $self.flood_detector.check_flood() {
148            return $self.handle_flood_violation(violation);
149        }
150    };
151}
152
153/// Outcome of a single-stream write flush in write_streams.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155enum FlushOutcome {
156    /// All queued bytes were drained to the socket.
157    Drained,
158    /// The socket blocked before the queue was drained. The caller must
159    /// arrange to resume (set expect_write or return from write_streams).
160    Stalled,
161}
162
163// ── RFC 9113 §6.5.2 Settings Defaults ───────────────────────────────────────
164
165const DEFAULT_HEADER_TABLE_SIZE: u32 = 4096;
166const DEFAULT_MAX_CONCURRENT_STREAMS: u32 = 100;
167pub(super) const DEFAULT_INITIAL_WINDOW_SIZE: u32 = (1 << 16) - 1; // 65535
168const DEFAULT_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
169
170// RFC 9113 §6.5.2: SETTINGS_MAX_FRAME_SIZE valid range [2^14, 2^24)
171const MIN_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
172const MAX_MAX_FRAME_SIZE: u32 = 1 << 24; // 16777216 (exclusive upper bound)
173
174// RFC 9113 §6.9: maximum flow control window size (2^31 - 1)
175const FLOW_CONTROL_MAX_WINDOW: u32 = (1 << 31) - 1;
176// RFC 9113 §5.1.1: stream identifiers are 31-bit unsigned integers (2^31 - 1).
177const STREAM_ID_MAX: u32 = 0x7FFF_FFFF;
178
179/// Allocate the next locally-initiated stream identifier given the current
180/// `last_stream_id` watermark, returning `(issued_id, next_last_stream_id)`
181/// or `None` when the 31-bit space is exhausted.
182///
183/// RFC 9113 §5.1.1 reserves odd identifiers for clients and even identifiers
184/// for servers. Sōzu never server-pushes, so in practice this helper is
185/// called on the backend (client) side via [`ConnectionH2::new_stream_id`].
186/// The server branch is kept symmetrical so the behaviour is exercised by
187/// the unit tests and remains correct if push is ever enabled.
188///
189/// `last_stream_id` tracks the even "watermark" (2, 4, 6, ...). A client call
190/// issues `watermark - 1` (odd), a server call issues `watermark - 2` (even).
191/// The helper enforces two invariants:
192/// - the issued identifier never exceeds `STREAM_ID_MAX` (2³¹ - 1); and
193/// - the returned watermark is a valid starting point for the next call.
194///
195/// Exhaustion is reported with `None` to the caller, which must emit
196/// GOAWAY(NO_ERROR) and stop issuing new streams on this connection
197/// (see `start_stream` for the client-side drain path).
198pub(super) fn next_stream_id(
199    last_stream_id: StreamId,
200    is_client: bool,
201) -> Option<(StreamId, StreamId)> {
202    let next = last_stream_id.checked_add(2)?;
203    let issued = if is_client {
204        next.checked_sub(1)?
205    } else {
206        next.checked_sub(2)?
207    };
208    // RFC 9113 §5.1.1: stream identifiers are 31-bit. Reject any allocation
209    // whose issued value would exceed `STREAM_ID_MAX`; the watermark itself
210    // is allowed to sit at `STREAM_ID_MAX + 1` (the sentinel that fails the
211    // next call).
212    if issued > STREAM_ID_MAX {
213        return None;
214    }
215    // Post-conditions (RFC 9113 §5.1.1):
216    // - the issued id fits the 31-bit space;
217    // - the returned watermark is strictly greater than the id we issued, so a
218    //   subsequent call cannot re-issue or regress;
219    // - role-parity: client ids are odd, server ids even. This holds ONLY when
220    //   `last_stream_id` is an even watermark, which is the helper's documented
221    //   contract and what production always maintains (`create_stream` rounds to
222    //   `(stream_id + 2) & !1`; the connection initialises it to 0). The unit
223    //   tests deliberately feed odd `last` values at the saturation boundary, so
224    //   the parity check is gated on the watermark being even — a parity slip
225    //   from an *even* watermark would let two roles collide on one id.
226    debug_assert!(
227        issued <= STREAM_ID_MAX,
228        "issued stream id must fit the 31-bit space"
229    );
230    debug_assert!(
231        next > issued,
232        "the next watermark must advance strictly past the issued id"
233    );
234    debug_assert!(
235        last_stream_id & 1 != 0 || (issued & 1 == 1) == is_client,
236        "from an even watermark, client ids must be odd and server ids even (RFC 9113 §5.1.1)"
237    );
238    Some((issued, next))
239}
240
241/// Enlarged connection-level receive window (1 MB).
242/// The RFC 9113 default is 65 535 bytes, which is too small for high-throughput
243/// proxying and causes excessive WINDOW_UPDATE round-trips. 1 MB matches the
244/// initial window used by HAProxy, the h2 crate, and other production proxies.
245const ENLARGED_CONNECTION_WINDOW: u32 = 1_048_576;
246
247/// H2 client connection preface size: 24-byte magic + 9-byte SETTINGS frame header
248pub(super) const CLIENT_PREFACE_SIZE: usize = 24 + parser::FRAME_HEADER_SIZE;
249
250// ── Flood Detection Thresholds (CVE mitigations) ────────────────────────────
251
252/// Default maximum RST_STREAM frames per window (CVE-2023-44487 Rapid Reset + CVE-2019-9514)
253const DEFAULT_MAX_RST_STREAM_PER_WINDOW: u32 = 100;
254/// Hard lifetime cap on total RST_STREAM frames received on a single
255/// connection (CVE-2023-44487 Rapid Reset).
256///
257/// The per-window counter half-decays, which allows a patient attacker to
258/// sustain ~50 RST/sec indefinitely — each one costs the backend a request
259/// that will be cancelled before any response work is produced. A lifetime
260/// counter that never decays puts an absolute ceiling on that amplification
261/// per connection. 10 000 is generous for legitimate traffic (months of
262/// occasional client-side cancellations) but rapidly trips on the ~30/sec
263/// abusive pace reported in the CVE-2023-44487 advisory (~5 minutes).
264pub(super) const DEFAULT_MAX_RST_STREAM_LIFETIME: u64 = 10_000;
265/// Hard lifetime cap on RST_STREAM frames received BEFORE the corresponding
266/// backend response has started. These are the cheap-for-client /
267/// expensive-for-us resets that characterise Rapid Reset: the client pays
268/// one RST frame, we pay a round-trip to the backend plus request parsing.
269/// A much lower ceiling kills the attack well before 10 000 lifetime total.
270pub(super) const DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME: u64 = 50;
271/// Absolute lifetime cap on **server-emitted** RST_STREAM frames on a single
272/// connection (CVE-2025-8671 — "MadeYouReset"). Distinct from
273/// [`DEFAULT_MAX_RST_STREAM_LIFETIME`] which caps *received* RSTs
274/// (CVE-2023-44487 Rapid Reset).
275///
276/// MadeYouReset has the server talk itself into flooding: the attacker sends
277/// legitimate-looking frames that force the server to emit RST_STREAM (content
278/// -length mismatch, header parse error, rejected priority, zero-increment
279/// `WINDOW_UPDATE` on an open stream, …). Each forced RST costs the server a
280/// header-decode, kawa buffer setup and frame serialisation; uncapped, it
281/// becomes the same class of DoS as Rapid Reset but with a flipped emission
282/// direction.
283///
284/// 500 is conservative: legitimate traffic very rarely triggers a
285/// server-initiated RST (aside from graceful `NoError` cancels which are not
286/// counted), so crossing 500 on a single connection is a strong abuse signal.
287pub(super) const DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME: u64 = 500;
288/// Default maximum PING frames per window (CVE-2019-9512 Ping Flood)
289const DEFAULT_MAX_PING_PER_WINDOW: u32 = 100;
290/// Absolute lifetime cap on PING frames received on a single connection.
291/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
292/// keep-alives but trips on sustained low-rate abuse (CVE-2019-9512).
293const DEFAULT_MAX_PING_LIFETIME: u32 = 10_000;
294/// Default maximum SETTINGS frames per window (CVE-2019-9515 Settings Flood)
295const DEFAULT_MAX_SETTINGS_PER_WINDOW: u32 = 50;
296/// Absolute lifetime cap on SETTINGS frames received on a single connection.
297/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
298/// renegotiations but trips on sustained low-rate abuse (CVE-2019-9515).
299const DEFAULT_MAX_SETTINGS_LIFETIME: u32 = 10_000;
300/// Default maximum empty DATA frames per window (CVE-2019-9518 Empty Frames)
301const DEFAULT_MAX_EMPTY_DATA_PER_WINDOW: u32 = 100;
302/// Default maximum connection-level (stream 0) WINDOW_UPDATE frames per
303/// sliding window. Non-zero stream-0 WINDOW_UPDATE frames are otherwise
304/// uncounted by the generic glitch detector — a peer could burn proxy CPU by
305/// sending millions of legal-looking stream-0 WINDOW_UPDATEs. Value mirrors
306/// [`DEFAULT_MAX_EMPTY_DATA_PER_WINDOW`] / [`DEFAULT_MAX_PING_PER_WINDOW`] —
307/// legitimate proxies only need a handful per second.
308const DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW: u32 = 100;
309/// Default maximum CONTINUATION frames per header block (CVE-2024-27316)
310const DEFAULT_MAX_CONTINUATION_FRAMES: u32 = 20;
311/// Maximum accumulated header block size across CONTINUATION frames (64KB)
312pub(super) const MAX_HEADER_LIST_SIZE: usize = 65536;
313/// Default maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE)
314/// accepted from the peer. 64 KB is well above the RFC default of 4 KB
315/// while preventing a malicious peer from advertising up to 4 GB.
316const DEFAULT_MAX_HEADER_TABLE_SIZE: u32 = 65536;
317/// Default maximum number of materialized header fields per request/response —
318/// HPACK fields plus expanded cookie crumbs (RFC 9113 §8.2.3). Bounds the HPACK
319/// indexed-reference "header bomb": each 1-byte indexed reference materializes a
320/// `Pair` of per-entry bookkeeping, so an attacker amplifies wire bytes into
321/// allocation. RFC 9113 §6.5.2's +32-octet/field accounting alone caps this at
322/// ~2048 fields for a 64 KB list; this explicit count cap is the tighter,
323/// upstream-matching defense (cf. nginx `max_headers`, Apache `LimitRequestFields`).
324const DEFAULT_MAX_HEADER_FIELDS: u32 = 128;
325/// Cumulative outbound progress (bytes) a window-stalled stream must drain to
326/// clear its flow-control-stall deadline (M2 cumulative-stall budget). Below
327/// this, a `WINDOW_UPDATE(+1)` drip that trickles a few bytes per idle period
328/// cannot keep the slot alive: the deadline ages out and the reaper
329/// RST(CANCEL)s the stream. Chosen as one max H2 DATA frame payload (16 KiB) —
330/// a legitimate slow-but-steady transfer drains at least one frame per idle
331/// period at any realistic bandwidth, while a drip attacker grants far less. A
332/// `const`, not a config knob: `h2_stream_idle_timeout_seconds` is already the
333/// operator dial for slow-link tolerance, and coupling a second knob invites
334/// misconfiguration (high floor + low deadline = mass false reaps).
335const FC_STALL_CLEAR_FLOOR: usize = 16 * 1024;
336/// RFC 9113 §6.5.2: the size accounted against `SETTINGS_MAX_HEADER_LIST_SIZE`
337/// is the uncompressed name + value octets PLUS a 32-octet overhead per field.
338/// The per-field overhead is what bounds the field count under a fixed byte
339/// budget — omitting it lets a peer materialize ~33× more fields than intended.
340pub(super) const HEADER_FIELD_SIZE_OVERHEAD: usize = 32;
341/// Duration of the sliding window for rate-based flood counters
342const FLOOD_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(1);
343/// Default maximum general anomaly count before triggering ENHANCE_YOUR_CALM
344const DEFAULT_MAX_GLITCH_COUNT: u32 = 100;
345
346/// RFC 9113 §5.1.2: threshold of `REFUSED_STREAM` emissions per
347/// [`BACKPRESSURE_WINDOW_DURATION`] that triggers back-pressure — at this
348/// point we halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` so the
349/// peer throttles its request rate instead of paying the RST round-trip for
350/// every new stream.
351const BACKPRESSURE_REFUSAL_THRESHOLD: u32 = 50;
352/// Sliding window used to detect refusal bursts for SETTINGS back-pressure.
353const BACKPRESSURE_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(60);
354
355/// Configurable thresholds for H2 flood detection.
356///
357/// All values have safe defaults matching the compile-time constants.
358/// When configured via listener config, `None` values fall back to these defaults.
359#[derive(Debug, Clone, Copy, PartialEq, Eq)]
360pub struct H2FloodConfig {
361    /// Maximum RST_STREAM frames per second window (CVE-2023-44487, CVE-2019-9514)
362    pub max_rst_stream_per_window: u32,
363    /// Maximum PING frames per second window (CVE-2019-9512)
364    pub max_ping_per_window: u32,
365    /// Maximum SETTINGS frames per second window (CVE-2019-9515)
366    pub max_settings_per_window: u32,
367    /// Maximum empty DATA frames per second window (CVE-2019-9518)
368    pub max_empty_data_per_window: u32,
369    /// Maximum connection-level (stream 0) WINDOW_UPDATE frames per sliding
370    /// window. Caps the CPU cost of a peer sending a flood of non-zero
371    /// stream-0 WINDOW_UPDATEs — each is individually legal so the generic
372    /// glitch counter does not trip, yet millions per connection still burn
373    /// server CPU parsing and updating the flow window.
374    pub max_window_update_stream0_per_window: u32,
375    /// Maximum CONTINUATION frames per header block (CVE-2024-27316)
376    pub max_continuation_frames: u32,
377    /// Maximum accumulated protocol anomalies before ENHANCE_YOUR_CALM
378    pub max_glitch_count: u32,
379    /// Absolute lifetime cap on RST_STREAM frames received on a single
380    /// connection (CVE-2023-44487). Never decays — provides a ceiling the
381    /// per-window counter cannot.
382    pub max_rst_stream_lifetime: u64,
383    /// Lifetime cap on "abusive" (pre-response-start) RST_STREAM frames —
384    /// the Rapid Reset signature (CVE-2023-44487).
385    pub max_rst_stream_abusive_lifetime: u64,
386    /// Absolute lifetime cap on **server-emitted** RST_STREAM frames for this
387    /// connection (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
388    /// count — graceful cancels are exempt.
389    pub max_rst_stream_emitted_lifetime: u64,
390    /// Maximum accumulated HPACK-decoded header list size per request
391    /// (SETTINGS_MAX_HEADER_LIST_SIZE, RFC 9113 §6.5.2).
392    pub max_header_list_size: u32,
393    /// Maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE) accepted
394    /// from the peer. Caps the value the peer advertises in SETTINGS frames to
395    /// prevent unbounded HPACK encoder memory growth.
396    pub max_header_table_size: u32,
397    /// Maximum number of materialized header fields, enforced per HEADERS block
398    /// and (independently) per trailers block — HPACK fields plus expanded
399    /// cookie crumbs (RFC 9113 §8.2.3). Bounds the HPACK indexed-reference
400    /// header bomb, where many 1-byte indexed references each materialize a
401    /// `Pair` of per-entry bookkeeping.
402    pub max_header_fields: u32,
403}
404
405impl Default for H2FloodConfig {
406    fn default() -> Self {
407        Self {
408            max_rst_stream_per_window: DEFAULT_MAX_RST_STREAM_PER_WINDOW,
409            max_ping_per_window: DEFAULT_MAX_PING_PER_WINDOW,
410            max_settings_per_window: DEFAULT_MAX_SETTINGS_PER_WINDOW,
411            max_empty_data_per_window: DEFAULT_MAX_EMPTY_DATA_PER_WINDOW,
412            max_window_update_stream0_per_window: DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW,
413            max_continuation_frames: DEFAULT_MAX_CONTINUATION_FRAMES,
414            max_glitch_count: DEFAULT_MAX_GLITCH_COUNT,
415            max_rst_stream_lifetime: DEFAULT_MAX_RST_STREAM_LIFETIME,
416            max_rst_stream_abusive_lifetime: DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME,
417            max_rst_stream_emitted_lifetime: DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME,
418            max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
419            max_header_table_size: DEFAULT_MAX_HEADER_TABLE_SIZE,
420            max_header_fields: DEFAULT_MAX_HEADER_FIELDS,
421        }
422    }
423}
424
425impl H2FloodConfig {
426    /// Create a validated config, clamping all thresholds to at least 1.
427    /// Zero thresholds would cause immediate flood detection on any frame.
428    #[allow(clippy::too_many_arguments)]
429    pub fn new(
430        max_rst_stream_per_window: u32,
431        max_ping_per_window: u32,
432        max_settings_per_window: u32,
433        max_empty_data_per_window: u32,
434        max_window_update_stream0_per_window: u32,
435        max_continuation_frames: u32,
436        max_glitch_count: u32,
437        max_rst_stream_lifetime: u64,
438        max_rst_stream_abusive_lifetime: u64,
439        max_rst_stream_emitted_lifetime: u64,
440        max_header_list_size: u32,
441        max_header_table_size: u32,
442        max_header_fields: u32,
443    ) -> Self {
444        let config = Self {
445            max_rst_stream_per_window: max_rst_stream_per_window.max(1),
446            max_ping_per_window: max_ping_per_window.max(1),
447            max_settings_per_window: max_settings_per_window.max(1),
448            max_empty_data_per_window: max_empty_data_per_window.max(1),
449            max_window_update_stream0_per_window: max_window_update_stream0_per_window.max(1),
450            max_continuation_frames: max_continuation_frames.max(1),
451            max_glitch_count: max_glitch_count.max(1),
452            max_rst_stream_lifetime: max_rst_stream_lifetime.max(1),
453            max_rst_stream_abusive_lifetime: max_rst_stream_abusive_lifetime.max(1),
454            max_rst_stream_emitted_lifetime: max_rst_stream_emitted_lifetime.max(1),
455            max_header_list_size: max_header_list_size.max(1),
456            max_header_table_size: max_header_table_size.max(1),
457            max_header_fields: max_header_fields.max(1),
458        };
459        // Post-condition: every threshold is clamped to at least 1. A zero
460        // threshold would make `check_flood`/`record_rst_*` trip on the very
461        // first frame (count > 0 > threshold), turning a legitimate connection
462        // into an immediate GOAWAY. This is the central invariant the clamps
463        // above exist to enforce — assert it rather than trusting the `.max(1)`
464        // chain stays correct under future edits.
465        debug_assert!(
466            config.max_rst_stream_per_window >= 1
467                && config.max_ping_per_window >= 1
468                && config.max_settings_per_window >= 1
469                && config.max_empty_data_per_window >= 1
470                && config.max_window_update_stream0_per_window >= 1
471                && config.max_continuation_frames >= 1
472                && config.max_glitch_count >= 1,
473            "every u32 flood threshold must be clamped to >= 1"
474        );
475        debug_assert!(
476            config.max_rst_stream_lifetime >= 1
477                && config.max_rst_stream_abusive_lifetime >= 1
478                && config.max_rst_stream_emitted_lifetime >= 1
479                && config.max_header_list_size >= 1
480                && config.max_header_table_size >= 1
481                && config.max_header_fields >= 1,
482            "every lifetime/size flood threshold must be clamped to >= 1"
483        );
484        config
485    }
486}
487
488/// Default stream Vec shrink ratio: shrink when total > active * ratio.
489const DEFAULT_STREAM_SHRINK_RATIO: u32 = 2;
490
491/// Configurable H2 connection tuning parameters.
492///
493/// All values have safe defaults. When configured via listener config,
494/// absent values fall back to compile-time defaults.
495#[derive(Debug, Clone, Copy, PartialEq, Eq)]
496pub struct H2ConnectionConfig {
497    /// Connection-level receive window size in bytes (RFC 9113 §6.9.2).
498    pub initial_connection_window: u32,
499    /// Maximum concurrent streams (SETTINGS_MAX_CONCURRENT_STREAMS).
500    pub max_concurrent_streams: u32,
501    /// Shrink threshold ratio for recycled stream slots.
502    pub stream_shrink_ratio: u32,
503}
504
505impl Default for H2ConnectionConfig {
506    fn default() -> Self {
507        Self {
508            initial_connection_window: ENLARGED_CONNECTION_WINDOW,
509            max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
510            stream_shrink_ratio: DEFAULT_STREAM_SHRINK_RATIO,
511        }
512    }
513}
514
515impl H2ConnectionConfig {
516    /// Create a validated config, clamping to safe bounds.
517    ///
518    /// - `initial_connection_window`: clamped to \[65535, 2^31-1\] per RFC 9113 §6.9
519    /// - `max_concurrent_streams`: minimum 1
520    /// - `stream_shrink_ratio`: minimum 2 (1 would defeat slot recycling)
521    pub fn new(
522        initial_connection_window: u32,
523        max_concurrent_streams: u32,
524        stream_shrink_ratio: u32,
525    ) -> Self {
526        let clamped_window =
527            initial_connection_window.clamp(DEFAULT_INITIAL_WINDOW_SIZE, FLOW_CONTROL_MAX_WINDOW);
528        if clamped_window != initial_connection_window {
529            warn!(
530                "{} h2_initial_connection_window {} clamped to [{}, {}]",
531                log_module_context!(),
532                initial_connection_window,
533                DEFAULT_INITIAL_WINDOW_SIZE,
534                FLOW_CONTROL_MAX_WINDOW
535            );
536        }
537        const MAX_SAFE_CONCURRENT_STREAMS: u32 = 10_000;
538        let clamped_streams = max_concurrent_streams.clamp(1, MAX_SAFE_CONCURRENT_STREAMS);
539        if max_concurrent_streams > MAX_SAFE_CONCURRENT_STREAMS {
540            error!(
541                "{} h2_max_concurrent_streams={} exceeds safe limit, clamped to {}",
542                log_module_context!(),
543                max_concurrent_streams,
544                MAX_SAFE_CONCURRENT_STREAMS
545            );
546        }
547        if clamped_streams != max_concurrent_streams
548            && max_concurrent_streams <= MAX_SAFE_CONCURRENT_STREAMS
549        {
550            warn!(
551                "{} h2_max_concurrent_streams {} clamped to minimum 1",
552                log_module_context!(),
553                max_concurrent_streams
554            );
555        }
556        let clamped_ratio = stream_shrink_ratio.max(2);
557        if clamped_ratio != stream_shrink_ratio {
558            warn!(
559                "{} h2_stream_shrink_ratio {} clamped to minimum 2",
560                log_module_context!(),
561                stream_shrink_ratio
562            );
563        }
564        let config = Self {
565            initial_connection_window: clamped_window,
566            max_concurrent_streams: clamped_streams,
567            stream_shrink_ratio: clamped_ratio,
568        };
569        // Post-conditions matching the documented clamp ranges. The window must
570        // stay within RFC 9113 §6.9's [65535, 2^31-1] (a window outside this
571        // band desynchronises flow control with the peer); max_concurrent_streams
572        // must be >= 1 (zero would refuse every stream); shrink_ratio must be
573        // >= 2 (1 defeats slot recycling, the whole point of the knob).
574        debug_assert!(
575            (DEFAULT_INITIAL_WINDOW_SIZE..=FLOW_CONTROL_MAX_WINDOW)
576                .contains(&config.initial_connection_window),
577            "clamped connection window must lie within RFC 9113 §6.9 bounds"
578        );
579        debug_assert!(
580            config.max_concurrent_streams >= 1,
581            "clamped max_concurrent_streams must be >= 1"
582        );
583        debug_assert!(
584            config.stream_shrink_ratio >= 2,
585            "clamped stream_shrink_ratio must be >= 2 to keep slot recycling effective"
586        );
587        config
588    }
589
590    /// Create from optional config values, falling back to compile-time defaults.
591    /// Combines unwrap-or-default with validation clamping.
592    pub fn from_optional(
593        window: Option<u32>,
594        max_streams: Option<u32>,
595        shrink_ratio: Option<u32>,
596    ) -> Self {
597        let defaults = Self::default();
598        Self::new(
599            window.unwrap_or(defaults.initial_connection_window),
600            max_streams.unwrap_or(defaults.max_concurrent_streams),
601            shrink_ratio.unwrap_or(defaults.stream_shrink_ratio),
602        )
603    }
604}
605
606/// Default pending WINDOW_UPDATE capacity (used in tests).
607/// The actual per-connection cap is computed from `connection_config.max_concurrent_streams`.
608#[cfg(test)]
609const DEFAULT_MAX_PENDING_WINDOW_UPDATES: usize = 1 + DEFAULT_MAX_CONCURRENT_STREAMS as usize * 4;
610
611/// Maximum number of pending RST_STREAM frames before triggering GOAWAY.
612/// When a peer causes excessive RST_STREAM queueing (e.g. rapid stream creation
613/// beyond MAX_CONCURRENT_STREAMS), this cap prevents unbounded memory growth
614/// and triggers an ENHANCE_YOUR_CALM connection error.
615const MAX_PENDING_RST_STREAMS: usize = 200;
616
617/// RFC 9113 §6.5: maximum time (in seconds) to wait for SETTINGS ACK before
618/// sending GOAWAY with SETTINGS_TIMEOUT error code.
619const SETTINGS_ACK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
620
621#[inline(always)]
622fn error_nom_to_h2(error: nom::Err<parser::ParserError>) -> H2Error {
623    match error {
624        nom::Err::Error(parser::ParserError {
625            kind: parser::ParserErrorKind::H2(e),
626            ..
627        }) => e,
628        nom::Err::Failure(parser::ParserError {
629            kind: parser::ParserErrorKind::H2(e),
630            ..
631        }) => e,
632        _ => H2Error::ProtocolError,
633    }
634}
635
636/// Distribute connection-level byte overhead proportionally to a single stream.
637///
638/// Overhead is distributed in proportion to the bytes this stream transferred
639/// relative to the total across all active streams. A stream that transferred
640/// 60% of total bytes gets 60% of the overhead.
641///
642/// `stream_bytes` and `total_bytes` are `(bytes_in, bytes_out)` tuples.
643/// Falls back to even distribution (1/active_streams) when no stream has
644/// transferred any bytes yet (total is zero).
645///
646/// Extracted as a free function to avoid borrow conflicts when `self` fields
647/// (e.g. `encoder`) are borrowed by the converter while we need to update
648/// per-stream metrics and connection overhead counters.
649fn distribute_overhead(
650    metrics: &mut SessionMetrics,
651    overhead_bin: &mut usize,
652    overhead_bout: &mut usize,
653    stream_bytes: (usize, usize),
654    total_bytes: (usize, usize),
655    active_streams: usize,
656    is_last_stream: bool,
657) {
658    let share_in = if is_last_stream {
659        // Last stream gets all remaining overhead to avoid losing remainder bytes
660        // from integer division across earlier streams.
661        *overhead_bin
662    } else if total_bytes.0 > 0 {
663        // Clamp to remaining overhead — integer division rounding across multiple
664        // streams can cause accumulated shares to exceed the total.
665        (*overhead_bin * stream_bytes.0 / total_bytes.0).min(*overhead_bin)
666    } else {
667        // No stream has transferred any inbound bytes — fall back to even split.
668        *overhead_bin / active_streams.max(1)
669    };
670    let share_out = if is_last_stream {
671        *overhead_bout
672    } else if total_bytes.1 > 0 {
673        (*overhead_bout * stream_bytes.1 / total_bytes.1).min(*overhead_bout)
674    } else {
675        // No stream has transferred any outbound bytes — fall back to even split.
676        *overhead_bout / active_streams.max(1)
677    };
678    // Pre-condition: a stream can never be credited more overhead than remains
679    // in the pool — otherwise the `*overhead_b* -= share_*` below underflows
680    // (usize wraps to a huge value, corrupting connection-overhead accounting).
681    // Every branch above either takes the whole pool (last stream) or `.min`s
682    // against it, so this must hold.
683    debug_assert!(
684        share_in <= *overhead_bin,
685        "overhead-in share must not exceed the remaining overhead pool"
686    );
687    debug_assert!(
688        share_out <= *overhead_bout,
689        "overhead-out share must not exceed the remaining overhead pool"
690    );
691    let before_bin = *overhead_bin;
692    let before_bout = *overhead_bout;
693    metrics.bin += share_in;
694    metrics.bout += share_out;
695    *overhead_bin -= share_in;
696    *overhead_bout -= share_out;
697    // Post-condition: the pool shrinks by exactly the credited share (overhead
698    // is conserved, neither created nor lost). The last stream drains it to 0.
699    debug_assert_eq!(
700        *overhead_bin,
701        before_bin - share_in,
702        "overhead-in pool must decrease by exactly the credited share"
703    );
704    debug_assert_eq!(
705        *overhead_bout,
706        before_bout - share_out,
707        "overhead-out pool must decrease by exactly the credited share"
708    );
709    debug_assert!(
710        !is_last_stream || (*overhead_bin == 0 && *overhead_bout == 0),
711        "the last stream must drain the overhead pool to zero (no lost remainder)"
712    );
713}
714
715/// LIFECYCLE §9 invariant 16 probe: returns `true` if any open stream still
716/// has outbound kawa bytes queued (`back.out` non-empty or `back.blocks`
717/// non-drained).
718///
719/// Used by `finalize_write` to preserve `Ready::WRITABLE` across a voluntary
720/// scheduler yield, and by `has_pending_write_full` to block shutdown-drain
721/// while bytes are still owed to the frontend.
722///
723/// `.get()` rather than direct indexing: an unknown `GlobalStreamId` is
724/// treated as "no pending bytes" rather than panicking — defence-in-depth
725/// against a stream-removal race during shutdown.
726fn any_stream_has_pending_back(
727    streams: &HashMap<StreamId, GlobalStreamId>,
728    context_streams: &[Stream],
729) -> bool {
730    any_stream_id_matches(streams, |gid| {
731        context_streams
732            .get(gid)
733            .is_some_and(|s| !s.back.out.is_empty() || !s.back.blocks.is_empty())
734    })
735}
736
737/// Iteration core of [`any_stream_has_pending_back`], split out so the
738/// invariant-16 dispatch is unit-testable without a full [`Stream`] fixture
739/// (the existing test module only covers `H2FloodDetector`).
740fn any_stream_id_matches<F>(streams: &HashMap<StreamId, GlobalStreamId>, mut probe: F) -> bool
741where
742    F: FnMut(GlobalStreamId) -> bool,
743{
744    streams.values().any(|gid| probe(*gid))
745}
746
747/// Collect the live streams that have exceeded `deadline` under either
748/// per-stream reap guard, deduped so a stream tripping both is reaped (and
749/// access-logged) exactly once. Split out from
750/// [`ConnectionH2::cancel_timed_out_streams`] so the two-guard union is
751/// unit-testable without a full `ConnectionH2` fixture (the existing test
752/// module only fixtures `H2FloodDetector` and `Stream`):
753///
754/// - `last_activity` — bidirectional-silence guard: no DATA/HEADERS in either
755///   direction (the slow-multiplex Slowloris timer).
756/// - `fc_stalled` — outbound-flow-control-starvation guard: a buffered response
757///   that cannot drain because the peer keeps its receive window shut (the
758///   HTTP/2 window-stall / WINDOW_UPDATE-drip vector). This guard is what the
759///   liveness timer misses: an inbound 1-byte DATA drip keeps `last_activity`
760///   warm, but never touches `fc_stalled`.
761///
762/// Streams not in `live_streams` or already in `rst_sent` are skipped. The
763/// returned reason string is the access-log tag for the guard that tripped
764/// first (idle takes precedence on a tie, purely for a stable label).
765fn collect_timed_out_streams(
766    last_activity: &HashMap<StreamId, Instant>,
767    fc_stalled: &HashMap<StreamId, Instant>,
768    live_streams: &HashMap<StreamId, GlobalStreamId>,
769    rst_sent: &HashSet<StreamId>,
770    now: Instant,
771    deadline: std::time::Duration,
772) -> Vec<(StreamId, &'static str)> {
773    let eligible = |sid: StreamId| live_streams.contains_key(&sid) && !rst_sent.contains(&sid);
774    let expired = |t: Instant| now.saturating_duration_since(t) > deadline;
775    let mut seen: HashSet<StreamId> = HashSet::new();
776    let mut out: Vec<(StreamId, &'static str)> = Vec::new();
777    for (&sid, &t) in last_activity {
778        if eligible(sid) && expired(t) && seen.insert(sid) {
779            out.push((sid, "H2::IdleTimeout"));
780        }
781    }
782    for (&sid, &t) in fc_stalled {
783        if eligible(sid) && expired(t) && seen.insert(sid) {
784            out.push((sid, "H2::WindowStall"));
785        }
786    }
787    out
788}
789
790/// True when a stream still has response/upload bytes that could be put on the
791/// wire — headers/body in flight, or a terminated-but-not-fully-flushed buffer.
792/// Deliberately EXCLUDES `is_error()`/`rst_sent`: that disjunct is specific to
793/// the priority-eligibility and write-loop gates (`write_streams`) and must stay
794/// inline there; this 2-clause helper backs ONLY the window-stall arm.
795fn has_sendable_response(kawa: &GenericHttpStream) -> bool {
796    kawa.is_main_phase() || (kawa.is_terminated() && !kawa.is_completed())
797}
798
799/// Outcome of the M2 cumulative-stall budget decision for one `write_streams`
800/// pass on a window-stalled stream. Extracted from the `write_streams` arm so
801/// the budget logic is unit-testable without a full `ConnectionH2` fixture
802/// (mirrors the [`collect_timed_out_streams`] extraction).
803#[derive(Debug, Clone, Copy, PartialEq, Eq)]
804enum FcStallAction {
805    /// Clear both the deadline (`stream_fc_stalled_since`) and the progress
806    /// accumulator (`stream_fc_stalled_progress`) for this stream.
807    Clear,
808    /// Ensure the deadline is armed (WITHOUT refreshing an existing `Instant`)
809    /// and set the progress accumulator to `progress`.
810    Arm { progress: usize },
811}
812
813/// Decide what to do with a stream's flow-control-stall deadline + cumulative
814/// progress accumulator on one write pass (M2 cumulative-stall budget).
815///
816/// - A genuinely open send window (`!outbound_window_blocked`) is a real
817///   un-stall → [`FcStallAction::Clear`].
818/// - While the window stays blocked, accumulate this pass's outbound drain
819///   (`consumed`, clamped to `>= 0`) onto `prior_progress`. Once the cumulative
820///   total reaches [`FC_STALL_CLEAR_FLOOR`] (a full DATA frame of real delivery)
821///   → `Clear`; otherwise `Arm` with the running total. A `WINDOW_UPDATE(+1)`
822///   drip adds ~1 byte/pass and never reaches the floor, so the deadline keeps
823///   aging and the reaper eventually fires.
824fn fc_stall_budget_decision(
825    outbound_window_blocked: bool,
826    consumed: i32,
827    prior_progress: Option<usize>,
828) -> FcStallAction {
829    if !outbound_window_blocked {
830        return FcStallAction::Clear;
831    }
832    let progressed = prior_progress
833        .unwrap_or(0)
834        .saturating_add(consumed.max(0) as usize);
835    if progressed >= FC_STALL_CLEAR_FLOOR {
836        FcStallAction::Clear
837    } else {
838        FcStallAction::Arm {
839            progress: progressed,
840        }
841    }
842}
843
844/// Core of [`ConnectionH2::enqueue_rst`], extracted so the RST-queueing
845/// semantics (dedupe, queued-cap counter bump, invariant-15 readiness rearm)
846/// can be unit-tested without building a full `ConnectionH2<Front>` fixture.
847///
848/// Invariants enforced:
849/// - **Dedupe** via `rst_sent`: at most one queued RST per wire stream id.
850///   `HashSet::insert` returns `false` when the id is already present; we
851///   short-circuit on that branch to keep `pending_rst_streams`,
852///   `total_rst_streams_queued` and the wire counts consistent.
853/// - **MadeYouReset queued cap** (`MAX_PENDING_RST_STREAMS`): each freshly
854///   queued RST bumps `total_rst_streams_queued`, which
855///   `flush_pending_control_frames` polices to escalate to
856///   `GOAWAY(ENHANCE_YOUR_CALM)` when exceeded.
857/// - **Invariant 15** (edge-triggered epoll): pair `Ready::WRITABLE` interest
858///   with the event bit so `writable()` is scheduled on the next tick.
859///
860/// Returns `true` when the RST was freshly queued, `false` when the
861/// stream was already in `rst_sent` (the caller asked to RST the same
862/// stream twice — a benign re-entrant idempotency, NOT a new wire
863/// emission). The boolean lets [`ConnectionH2::enqueue_rst`] account
864/// the RST only on the freshly-queued path so duplicate calls do not
865/// inflate the per-error counter or trip the MadeYouReset flood cap
866/// for frames that never reach the wire.
867fn enqueue_rst_into(
868    pending: &mut Vec<(StreamId, H2Error)>,
869    total: &mut usize,
870    rst_sent: &mut HashSet<StreamId>,
871    readiness: &mut Readiness,
872    wire_stream_id: StreamId,
873    error: H2Error,
874) -> bool {
875    let pending_before = pending.len();
876    let total_before = *total;
877    if !rst_sent.insert(wire_stream_id) {
878        // Dedupe short-circuit: the id was already queued/flushed. We must NOT
879        // touch any of the wire-count state, otherwise duplicate calls inflate
880        // the MadeYouReset (CVE-2025-8671) lifetime cap with frames that never
881        // reach the wire.
882        debug_assert!(
883            rst_sent.contains(&wire_stream_id),
884            "dedupe path requires the id to already be present in rst_sent"
885        );
886        debug_assert_eq!(
887            pending.len(),
888            pending_before,
889            "dedupe path must not enqueue a new pending RST"
890        );
891        debug_assert_eq!(
892            *total, total_before,
893            "dedupe path must not bump the queued-RST lifetime counter"
894        );
895        return false;
896    }
897    pending.push((wire_stream_id, error));
898    *total += 1;
899    readiness.arm_writable();
900    // Post-condition: a freshly-queued RST advances both the pending Vec and the
901    // lifetime counter by exactly one, and the id is now tracked for dedupe.
902    debug_assert!(
903        rst_sent.contains(&wire_stream_id),
904        "freshly-queued RST must be recorded in rst_sent for future dedupe"
905    );
906    debug_assert_eq!(
907        pending.len(),
908        pending_before + 1,
909        "a freshly-queued RST must push exactly one pending entry"
910    );
911    debug_assert_eq!(
912        *total,
913        total_before + 1,
914        "a freshly-queued RST must bump the queued-RST lifetime counter by one"
915    );
916    debug_assert_eq!(
917        pending.last().map(|(id, _)| *id),
918        Some(wire_stream_id),
919        "the just-pushed entry must be the requested wire stream id"
920    );
921    true
922}
923
924/// Detail of a flood-threshold violation returned by
925/// [`H2FloodDetector::check_flood`] and [`H2FloodDetector::record_rst_lifetime`].
926///
927/// Carrying `(reason, count, threshold)` lets the caller emit a session-scoped
928/// log line with full context — the detector itself is connection-agnostic and
929/// never logs.
930#[derive(Debug, Clone, PartialEq)]
931pub struct H2FloodViolation {
932    /// HTTP/2 error code to emit on the GOAWAY.
933    pub error: H2Error,
934    /// Human-readable name of the counter that tripped (e.g. `"RST_STREAM"`).
935    pub reason: &'static str,
936    /// Statsd metric key emitted by [`ConnectionH2::handle_flood_violation`].
937    /// Carried alongside `reason` so a single field maps to both the log line
938    /// and the dashboard counter — adding a new violation kind requires
939    /// choosing both at the construction site, preventing drift.
940    pub metric_key: &'static str,
941    /// Observed counter value at the moment of detection.
942    pub count: u64,
943    /// Configured ceiling that was crossed.
944    pub threshold: u64,
945}
946
947/// Tracks per-connection frame rates to detect and mitigate H2 flood attacks.
948///
949/// Monitors RST_STREAM (CVE-2023-44487), PING (CVE-2019-9512), SETTINGS (CVE-2019-9515),
950/// empty DATA (CVE-2019-9518), and CONTINUATION (CVE-2024-27316) flood patterns.
951/// When any counter exceeds its threshold, `check_flood()` returns the violation
952/// detail so callers can log with connection context before sending GOAWAY.
953///
954/// Thresholds are configurable via [`H2FloodConfig`], with safe defaults matching
955/// the original compile-time constants.
956#[derive(Debug)]
957pub struct H2FloodDetector {
958    /// RST_STREAM frames received in current window (CVE-2023-44487 + CVE-2019-9514)
959    pub(super) rst_stream_count: u32,
960    /// Lifetime RST_STREAM frames received on this connection.
961    ///
962    /// Never decays — provides an absolute ceiling that the half-decaying
963    /// per-window counter cannot, preventing a sustained ~50 RST/sec burst
964    /// from running forever.
965    pub(super) total_rst_received_lifetime: u64,
966    /// Lifetime RST_STREAM frames received that targeted a stream whose
967    /// backend response had not yet started. These are the "Rapid Reset"
968    /// signature — cheap for the attacker, expensive for the proxy — and
969    /// trip on a much lower ceiling than the generic lifetime counter.
970    pub(super) total_abusive_rst_received_lifetime: u64,
971    /// Lifetime RST_STREAM frames **emitted by the server** on this
972    /// connection (CVE-2025-8671 "MadeYouReset" mitigation). Incremented
973    /// inside [`ConnectionH2::reset_stream`] whenever a non-`NoError` reset
974    /// is triggered by an attacker-crafted frame (content-length mismatch,
975    /// header parse error, priority rejection, zero-increment WINDOW_UPDATE
976    /// on an open stream). Never decays — provides an absolute ceiling that
977    /// short-circuits patient-attacker patterns that stay under any windowed
978    /// counter.
979    pub(super) total_rst_streams_emitted_lifetime: u64,
980    /// PING frames received in current window (CVE-2019-9512)
981    pub(super) ping_count: u32,
982    /// Lifetime PING frames received on this connection.
983    ///
984    /// Never decays — provides an absolute ceiling that the half-decaying
985    /// per-window counter cannot, preventing sustained low-rate PING abuse.
986    pub(super) total_ping_received_lifetime: u32,
987    /// SETTINGS frames received in current window (CVE-2019-9515)
988    pub(super) settings_count: u32,
989    /// Lifetime SETTINGS frames received on this connection.
990    ///
991    /// Never decays — provides an absolute ceiling that the half-decaying
992    /// per-window counter cannot, preventing sustained low-rate SETTINGS abuse.
993    pub(super) total_settings_received_lifetime: u32,
994    /// Empty DATA frames received in current window (CVE-2019-9518)
995    pub(super) empty_data_count: u32,
996    /// Connection-level (stream 0) WINDOW_UPDATE frames received in current
997    /// sliding window. Half-decays with [`maybe_reset_window`] like other
998    /// rate counters. Increments on non-zero stream-0 WINDOW_UPDATEs only —
999    /// zero-increment frames short-circuit into GOAWAY(PROTOCOL_ERROR) per
1000    /// RFC 9113 §6.9 before reaching this counter.
1001    pub(super) window_update_stream0_count: u32,
1002    /// CONTINUATION frames received for current header block (CVE-2024-27316)
1003    pub(super) continuation_count: u32,
1004    /// Total accumulated header block size across CONTINUATION frames
1005    pub(super) accumulated_header_size: u32,
1006    /// General anomaly counter
1007    pub(super) glitch_count: u32,
1008    /// Window start for rate-based counters
1009    pub(super) window_start: Instant,
1010    /// Configurable thresholds for flood detection
1011    pub(super) config: H2FloodConfig,
1012}
1013
1014impl Default for H2FloodDetector {
1015    fn default() -> Self {
1016        Self::new(H2FloodConfig::default())
1017    }
1018}
1019
1020impl H2FloodDetector {
1021    pub fn new(config: H2FloodConfig) -> Self {
1022        // Pre-condition: thresholds are already validated (clamped to >= 1 by
1023        // `H2FloodConfig::new`). A zero per-window threshold would trip on the
1024        // first counted frame; assert it here so a config that bypassed `new`
1025        // (raw struct literal in a future caller) is caught in debug.
1026        debug_assert!(
1027            config.max_rst_stream_per_window >= 1
1028                && config.max_ping_per_window >= 1
1029                && config.max_settings_per_window >= 1
1030                && config.max_continuation_frames >= 1
1031                && config.max_glitch_count >= 1,
1032            "flood detector must be constructed with validated (>= 1) thresholds"
1033        );
1034        Self {
1035            rst_stream_count: 0,
1036            total_rst_received_lifetime: 0,
1037            total_abusive_rst_received_lifetime: 0,
1038            total_rst_streams_emitted_lifetime: 0,
1039            ping_count: 0,
1040            total_ping_received_lifetime: 0,
1041            settings_count: 0,
1042            total_settings_received_lifetime: 0,
1043            empty_data_count: 0,
1044            window_update_stream0_count: 0,
1045            continuation_count: 0,
1046            accumulated_header_size: 0,
1047            glitch_count: 0,
1048            window_start: Instant::now(),
1049            config,
1050        }
1051    }
1052
1053    /// Increment the lifetime RST_STREAM counters and return a
1054    /// [`H2FloodViolation`] if either the global or the abusive
1055    /// (pre-response-start) lifetime cap has been exceeded.
1056    ///
1057    /// `response_started` indicates whether the backend response had already
1058    /// begun when the RST arrived; `false` is the cheap-for-client /
1059    /// expensive-for-us Rapid Reset signature (CVE-2023-44487).
1060    pub fn record_rst_lifetime(&mut self, response_started: bool) -> Option<H2FloodViolation> {
1061        let total_before = self.total_rst_received_lifetime;
1062        let abusive_before = self.total_abusive_rst_received_lifetime;
1063        self.total_rst_received_lifetime = self.total_rst_received_lifetime.saturating_add(1);
1064        if !response_started {
1065            self.total_abusive_rst_received_lifetime =
1066                self.total_abusive_rst_received_lifetime.saturating_add(1);
1067        }
1068        // Monotonicity: the global lifetime counter advances by one per call
1069        // (until saturation), and the abusive sub-counter advances iff the RST
1070        // arrived before the backend response started. The abusive counter can
1071        // never exceed the global one — every abusive RST is also a received RST.
1072        debug_assert!(
1073            self.total_rst_received_lifetime >= total_before,
1074            "lifetime RST counter must be monotonic non-decreasing"
1075        );
1076        debug_assert_eq!(
1077            self.total_abusive_rst_received_lifetime > abusive_before,
1078            !response_started,
1079            "abusive RST counter advances iff the RST is pre-response-start"
1080        );
1081        debug_assert!(
1082            self.total_abusive_rst_received_lifetime <= self.total_rst_received_lifetime,
1083            "abusive RST count is a subset of total received RST count"
1084        );
1085        if self.total_rst_received_lifetime > self.config.max_rst_stream_lifetime {
1086            return Some(H2FloodViolation {
1087                error: H2Error::EnhanceYourCalm,
1088                reason: "Rapid Reset: lifetime RST_STREAM",
1089                metric_key: "h2.flood.violation.rst_stream_lifetime",
1090                count: self.total_rst_received_lifetime,
1091                threshold: self.config.max_rst_stream_lifetime,
1092            });
1093        }
1094        if self.total_abusive_rst_received_lifetime > self.config.max_rst_stream_abusive_lifetime {
1095            return Some(H2FloodViolation {
1096                error: H2Error::EnhanceYourCalm,
1097                reason: "Rapid Reset: lifetime pre-response RST_STREAM",
1098                metric_key: "h2.flood.violation.rst_stream_pre_response_lifetime",
1099                count: self.total_abusive_rst_received_lifetime,
1100                threshold: self.config.max_rst_stream_abusive_lifetime,
1101            });
1102        }
1103        None
1104    }
1105
1106    /// Increment the lifetime **server-emitted** RST_STREAM counter and
1107    /// return a [`H2FloodViolation`] once the configured ceiling is exceeded.
1108    ///
1109    /// Call sites are the error paths inside [`ConnectionH2::reset_stream`]
1110    /// where an attacker-crafted frame coerces the server into emitting a
1111    /// RST_STREAM (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
1112    /// are reported — callers must exclude graceful cancels.
1113    pub fn record_rst_emitted(&mut self) -> Option<H2FloodViolation> {
1114        let before = self.total_rst_streams_emitted_lifetime;
1115        self.total_rst_streams_emitted_lifetime =
1116            self.total_rst_streams_emitted_lifetime.saturating_add(1);
1117        // Monotonic: the emitted-RST counter never decays (it is the absolute
1118        // MadeYouReset ceiling, CVE-2025-8671), so each call strictly advances
1119        // it until u64 saturation.
1120        debug_assert!(
1121            self.total_rst_streams_emitted_lifetime > before || before == u64::MAX,
1122            "emitted-RST lifetime counter must advance (or already be saturated)"
1123        );
1124        if self.total_rst_streams_emitted_lifetime > self.config.max_rst_stream_emitted_lifetime {
1125            return Some(H2FloodViolation {
1126                error: H2Error::EnhanceYourCalm,
1127                reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
1128                metric_key: "h2.flood.violation.rst_stream_emitted_lifetime",
1129                count: self.total_rst_streams_emitted_lifetime,
1130                threshold: self.config.max_rst_stream_emitted_lifetime,
1131            });
1132        }
1133        None
1134    }
1135
1136    /// Half-decay rate-based counters if the current window has expired.
1137    /// Uses half-window decay instead of full reset to catch burst-then-wait attacks.
1138    fn maybe_reset_window(&mut self) {
1139        if self.window_start.elapsed() >= FLOOD_WINDOW_DURATION {
1140            let (rst_before, ping_before, settings_before) =
1141                (self.rst_stream_count, self.ping_count, self.settings_count);
1142            let (empty_before, wu0_before, glitch_before) = (
1143                self.empty_data_count,
1144                self.window_update_stream0_count,
1145                self.glitch_count,
1146            );
1147            self.rst_stream_count /= 2;
1148            self.ping_count /= 2;
1149            self.settings_count /= 2;
1150            self.empty_data_count /= 2;
1151            self.window_update_stream0_count /= 2;
1152            self.glitch_count /= 2;
1153            self.window_start = Instant::now();
1154            // Half-decay invariant: each rate-based counter is exactly halved
1155            // (integer division), never increased. Catching burst-then-wait
1156            // attacks relies on the counter shrinking but not vanishing — a
1157            // full reset would let a patient attacker reset to zero each window.
1158            debug_assert_eq!(self.rst_stream_count, rst_before / 2, "RST count halves");
1159            debug_assert_eq!(self.ping_count, ping_before / 2, "PING count halves");
1160            debug_assert_eq!(
1161                self.settings_count,
1162                settings_before / 2,
1163                "SETTINGS count halves"
1164            );
1165            debug_assert_eq!(
1166                self.empty_data_count,
1167                empty_before / 2,
1168                "empty-DATA count halves"
1169            );
1170            debug_assert_eq!(
1171                self.window_update_stream0_count,
1172                wu0_before / 2,
1173                "stream-0 WINDOW_UPDATE count halves"
1174            );
1175            debug_assert_eq!(self.glitch_count, glitch_before / 2, "glitch count halves");
1176            // The lifetime counters are deliberately NOT touched here — they are
1177            // the never-decaying ceilings. Guard against a future edit decaying
1178            // them by accident.
1179            debug_assert!(
1180                self.window_start.elapsed() < FLOOD_WINDOW_DURATION,
1181                "window_start must be refreshed to (approximately) now after decay"
1182            );
1183        }
1184    }
1185
1186    /// Check all flood counters. Returns a [`H2FloodViolation`] when a threshold
1187    /// is exceeded; the caller is responsible for logging with session context
1188    /// and escalating to GOAWAY.
1189    pub fn check_flood(&mut self) -> Option<H2FloodViolation> {
1190        self.maybe_reset_window();
1191
1192        fn flag(
1193            reason: &'static str,
1194            metric_key: &'static str,
1195            count: u32,
1196            threshold: u32,
1197        ) -> Option<H2FloodViolation> {
1198            if count > threshold {
1199                Some(H2FloodViolation {
1200                    error: H2Error::EnhanceYourCalm,
1201                    reason,
1202                    metric_key,
1203                    count: count as u64,
1204                    threshold: threshold as u64,
1205                })
1206            } else {
1207                None
1208            }
1209        }
1210
1211        let violation = flag(
1212            "RST_STREAM",
1213            "h2.flood.violation.rst_stream_window",
1214            self.rst_stream_count,
1215            self.config.max_rst_stream_per_window,
1216        )
1217        .or_else(|| {
1218            flag(
1219                "PING",
1220                "h2.flood.violation.ping_window",
1221                self.ping_count,
1222                self.config.max_ping_per_window,
1223            )
1224        })
1225        .or_else(|| {
1226            flag(
1227                "PING lifetime",
1228                "h2.flood.violation.ping_lifetime",
1229                self.total_ping_received_lifetime,
1230                DEFAULT_MAX_PING_LIFETIME,
1231            )
1232        })
1233        .or_else(|| {
1234            flag(
1235                "SETTINGS",
1236                "h2.flood.violation.settings_window",
1237                self.settings_count,
1238                self.config.max_settings_per_window,
1239            )
1240        })
1241        .or_else(|| {
1242            flag(
1243                "SETTINGS lifetime",
1244                "h2.flood.violation.settings_lifetime",
1245                self.total_settings_received_lifetime,
1246                DEFAULT_MAX_SETTINGS_LIFETIME,
1247            )
1248        })
1249        .or_else(|| {
1250            flag(
1251                "empty DATA",
1252                "h2.flood.violation.empty_data_window",
1253                self.empty_data_count,
1254                self.config.max_empty_data_per_window,
1255            )
1256        })
1257        .or_else(|| {
1258            flag(
1259                "CONTINUATION",
1260                "h2.flood.violation.continuation_per_block",
1261                self.continuation_count,
1262                self.config.max_continuation_frames,
1263            )
1264        })
1265        .or_else(|| {
1266            flag(
1267                "WINDOW_UPDATE stream 0",
1268                "h2.flood.violation.window_update_stream0_window",
1269                self.window_update_stream0_count,
1270                self.config.max_window_update_stream0_per_window,
1271            )
1272        })
1273        .or_else(|| {
1274            flag(
1275                "accumulated header size",
1276                "h2.flood.violation.header_size_per_block",
1277                self.accumulated_header_size,
1278                self.config.max_header_list_size,
1279            )
1280        })
1281        .or_else(|| {
1282            flag(
1283                "glitch",
1284                "h2.flood.violation.glitch_window",
1285                self.glitch_count,
1286                self.config.max_glitch_count,
1287            )
1288        });
1289        // Post-condition: any reported violation is well-formed — every H2
1290        // flood escalation is an ENHANCE_YOUR_CALM connection error, and the
1291        // observed count strictly exceeds the threshold it tripped (the `flag`
1292        // helper and the lifetime checks all use strict `>`). A violation whose
1293        // count <= threshold would be a false positive terminating a healthy
1294        // connection.
1295        debug_assert!(
1296            violation
1297                .as_ref()
1298                .is_none_or(|v| v.error == H2Error::EnhanceYourCalm && v.count > v.threshold),
1299            "a flood violation must be EnhanceYourCalm with count strictly above threshold"
1300        );
1301        violation
1302    }
1303
1304    /// Reset CONTINUATION-specific counters when a header block is complete.
1305    pub fn reset_continuation(&mut self) {
1306        self.continuation_count = 0;
1307        self.accumulated_header_size = 0;
1308        // Post-condition: both CONTINUATION-block accumulators are cleared so
1309        // the next header block starts from zero (CVE-2024-27316 per-block
1310        // accounting must not leak across blocks).
1311        debug_assert_eq!(
1312            self.continuation_count, 0,
1313            "continuation_count must be zero after a block completes"
1314        );
1315        debug_assert_eq!(
1316            self.accumulated_header_size, 0,
1317            "accumulated_header_size must be zero after a block completes"
1318        );
1319    }
1320}
1321
1322#[derive(Debug)]
1323pub enum H2State {
1324    ClientPreface,
1325    ClientSettings,
1326    ServerSettings,
1327    Header,
1328    Frame(FrameHeader),
1329    ContinuationHeader(Headers),
1330    ContinuationFrame(Headers),
1331    GoAway,
1332    Error,
1333    Discard,
1334}
1335
1336#[derive(Debug, Clone, Copy)]
1337pub struct H2Settings {
1338    pub settings_header_table_size: u32,
1339    pub settings_enable_push: bool,
1340    pub settings_max_concurrent_streams: u32,
1341    pub settings_initial_window_size: u32,
1342    pub settings_max_frame_size: u32,
1343    pub settings_max_header_list_size: u32,
1344    /// RFC 8441
1345    pub settings_enable_connect_protocol: bool,
1346    /// RFC 9218
1347    pub settings_no_rfc7540_priorities: bool,
1348}
1349
1350impl Default for H2Settings {
1351    fn default() -> Self {
1352        Self {
1353            settings_header_table_size: DEFAULT_HEADER_TABLE_SIZE,
1354            settings_enable_push: false,
1355            settings_max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
1356            settings_initial_window_size: DEFAULT_INITIAL_WINDOW_SIZE,
1357            settings_max_frame_size: DEFAULT_MAX_FRAME_SIZE,
1358            settings_max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
1359            settings_enable_connect_protocol: false,
1360            settings_no_rfc7540_priorities: true,
1361        }
1362    }
1363}
1364
1365/// RFC 9218 Extensible Priorities for HTTP stream scheduling.
1366///
1367/// Stores per-stream urgency (0-7, lower = more important) and incremental
1368/// flag. Used by `writable()` to sort streams: lower urgency first, then
1369/// stream ID for stability among same-urgency non-incremental streams.
1370///
1371/// Within a same-urgency bucket the scheduler (see
1372/// [`ConnectionH2::write_streams`]) drains non-incremental streams
1373/// sequentially, then applies RFC 9218 §4 round-robin to the incremental
1374/// streams starting from [`Self::incremental_cursor`], so multiple concurrent
1375/// downloads at the same urgency interleave their DATA frames fairly.
1376///
1377/// Streams without an explicit `priority` header get the RFC 9218 defaults:
1378/// urgency 3, incremental false.
1379#[derive(Default)]
1380pub struct Prioriser {
1381    /// Per-stream priority: stream_id -> (urgency 0-7, incremental flag)
1382    priorities: HashMap<StreamId, (u8, bool)>,
1383    /// RFC 9218 §4 round-robin cursor: stream ID that fired first in the
1384    /// last write pass over the incremental tail of the lowest-urgency
1385    /// bucket that contained at least one incremental stream. The next pass
1386    /// starts from the stream immediately after this ID (wrapping around),
1387    /// so a single slow-draining stream cannot hog the connection.
1388    ///
1389    /// `0` is the "no cursor yet" sentinel and means "start from the
1390    /// smallest ID in the bucket" — H2 stream IDs are always > 0.
1391    incremental_cursor: StreamId,
1392}
1393
1394/// RFC 9218 §4 default urgency value.
1395const DEFAULT_URGENCY: u8 = 3;
1396
1397/// Maximum entries in the priority map to prevent flooding via PRIORITY frames.
1398const MAX_PRIORITIES: usize = 4096;
1399
1400/// Small look-ahead window (in stream IDs) for PRIORITY frames that arrive
1401/// slightly before the peer opens the corresponding stream. RFC 9218 allows
1402/// PRIORITY to be sent for an idle stream that the peer intends to open
1403/// soon. Past this budget we assume the ID will never be used and drop the
1404/// entry, preventing flooding with far-future stream IDs.
1405const PRIORITY_IDLE_LOOKAHEAD: u32 = 64;
1406
1407impl Prioriser {
1408    /// Record or update the priority for a stream that we know exists or are
1409    /// currently processing (used from pkawa's header-handling path where the
1410    /// owning stream's HEADERS frame is being decoded).
1411    ///
1412    /// Returns `true` if the priority is invalid (self-dependency for RFC 7540),
1413    /// signalling the caller should reset the stream with a protocol error.
1414    pub fn push_priority(&mut self, stream_id: StreamId, priority: parser::PriorityPart) -> bool {
1415        trace!(
1416            "{} PRIORITY REQUEST FOR {}: {:?}",
1417            log_module_context!(),
1418            stream_id,
1419            priority
1420        );
1421        // Pre-condition: the priority map never grows past MAX_PRIORITIES.
1422        // The cap is the only thing standing between a PRIORITY flood and
1423        // unbounded memory; assert it holds on entry (each insert path below
1424        // either updates an existing key or is gated by this check).
1425        debug_assert!(
1426            self.priorities.len() <= MAX_PRIORITIES,
1427            "priority map must never exceed MAX_PRIORITIES entries"
1428        );
1429        // Cap the priority map to prevent flooding via PRIORITY frames
1430        if !self.priorities.contains_key(&stream_id) && self.priorities.len() >= MAX_PRIORITIES {
1431            return false;
1432        }
1433        match priority {
1434            parser::PriorityPart::Rfc7540 {
1435                stream_dependency,
1436                weight: _,
1437            } => {
1438                // RFC 9113 §5.3.1: a stream cannot depend on itself; signal
1439                // the caller to RST_STREAM with PROTOCOL_ERROR. Otherwise the
1440                // RFC 7540 priority tree is deprecated and silently ignored.
1441                stream_dependency.stream_id == stream_id
1442            }
1443            parser::PriorityPart::Rfc9218 {
1444                urgency,
1445                incremental,
1446            } => {
1447                // RFC 9218 §7.1: a malformed or out-of-range priority field
1448                // MUST be "treated as absent", NOT as a stream error. Clamping
1449                // an urgency > 7 to 7 is the policy-correct interpretation:
1450                // the field is still present (so defaulting would lose
1451                // information) but its value is normalised to the RFC's
1452                // allowed range [0..=7]. Intentionally not PROTOCOL_ERROR.
1453                self.priorities
1454                    .insert(stream_id, (urgency.min(7), incremental));
1455                // Post-conditions: the entry now exists with a clamped urgency
1456                // in [0, 7] (the writable scheduler buckets by urgency and would
1457                // mis-order on a value above 7), and the map stays within its
1458                // memory cap.
1459                debug_assert!(
1460                    self.priorities
1461                        .get(&stream_id)
1462                        .is_some_and(|(u, _)| *u <= 7),
1463                    "stored RFC 9218 urgency must be clamped to [0, 7]"
1464                );
1465                debug_assert!(
1466                    self.priorities.len() <= MAX_PRIORITIES,
1467                    "priority map must stay within MAX_PRIORITIES after insert"
1468                );
1469                false
1470            }
1471        }
1472    }
1473
1474    /// Record or update the priority for a stream ID that arrived via a
1475    /// standalone PRIORITY frame.
1476    ///
1477    /// Pass 3 Medium #4: without this guard, a peer could send PRIORITY for
1478    /// arbitrary stream IDs (e.g. 2^31 ever-increasing IDs) and pin up to
1479    /// `MAX_PRIORITIES` entries of memory. Accept only:
1480    /// - an ID that corresponds to a currently-open stream (`open_streams`);
1481    /// - an idle ID slightly ahead of `last_stream_id` (within
1482    ///   [`PRIORITY_IDLE_LOOKAHEAD`]), matching RFC 9218's "set priority for
1483    ///   a stream about to be opened" pattern.
1484    ///
1485    /// IDs in the past that we do not currently track (already closed) and
1486    /// IDs too far in the future are silently dropped. The `MAX_PRIORITIES`
1487    /// ceiling is preserved as a defensive backstop if both filters are ever
1488    /// circumvented.
1489    ///
1490    /// Returns the same value semantics as [`Self::push_priority`].
1491    pub fn push_priority_guarded(
1492        &mut self,
1493        stream_id: StreamId,
1494        priority: parser::PriorityPart,
1495        last_stream_id: StreamId,
1496        open_streams: &HashMap<StreamId, GlobalStreamId>,
1497    ) -> bool {
1498        if !self.is_acceptable(stream_id, last_stream_id, open_streams) {
1499            trace!(
1500                "{} PRIORITY dropped for unknown/far stream {} (last_stream_id={})",
1501                log_module_context!(),
1502                stream_id,
1503                last_stream_id
1504            );
1505            return false;
1506        }
1507        self.push_priority(stream_id, priority)
1508    }
1509
1510    fn is_acceptable(
1511        &self,
1512        stream_id: StreamId,
1513        last_stream_id: StreamId,
1514        open_streams: &HashMap<StreamId, GlobalStreamId>,
1515    ) -> bool {
1516        if open_streams.contains_key(&stream_id) {
1517            return true;
1518        }
1519        // Idle stream ahead of the current counter: accept a small look-ahead.
1520        // Past IDs that are NOT in `open_streams` are closed — drop them.
1521        let upper = last_stream_id.saturating_add(PRIORITY_IDLE_LOOKAHEAD);
1522        stream_id > last_stream_id && stream_id <= upper
1523    }
1524
1525    /// Remove a stream's priority entry (called when the stream is recycled).
1526    pub fn remove(&mut self, stream_id: &StreamId) {
1527        let had = self.priorities.contains_key(stream_id);
1528        let before = self.priorities.len();
1529        self.priorities.remove(stream_id);
1530        // Post-conditions: the entry is truly gone, and the map shrinks by
1531        // exactly one iff it was present. A leak here re-introduces the
1532        // PRIORITY-flood memory exposure the cap defends against.
1533        debug_assert!(
1534            !self.priorities.contains_key(stream_id),
1535            "remove must evict the priority entry"
1536        );
1537        debug_assert_eq!(
1538            self.priorities.len(),
1539            before - had as usize,
1540            "priority map length drops by exactly one iff the id was present"
1541        );
1542    }
1543
1544    /// Look up the priority for a stream, returning RFC 9218 defaults if absent.
1545    #[inline]
1546    pub fn get(&self, stream_id: &StreamId) -> (u8, bool) {
1547        self.priorities
1548            .get(stream_id)
1549            .copied()
1550            .unwrap_or((DEFAULT_URGENCY, false))
1551    }
1552
1553    /// Reorder a pre-sorted slice of writable stream IDs so that inside each
1554    /// urgency bucket, incremental streams appear after non-incremental ones,
1555    /// and the incremental tail is rotated by [`Self::incremental_cursor`]
1556    /// (RFC 9218 §4).
1557    ///
1558    /// The input `buf` must already be sorted by `(urgency, stream_id)`:
1559    /// this routine only partitions and rotates inside same-urgency
1560    /// contiguous runs, it does not re-sort.
1561    ///
1562    /// Returns the total number of incremental streams seen, so callers that
1563    /// need to update the cursor at the end of the write pass can early-exit
1564    /// when the count is zero.
1565    pub fn apply_incremental_rotation(&self, buf: &mut [StreamId]) -> usize {
1566        // Pre-condition: callers must hand a slice already sorted by urgency so
1567        // same-urgency runs are contiguous (this routine only partitions/rotates
1568        // within a run, it does not re-sort across urgencies). A non-monotonic
1569        // urgency sequence would split one logical bucket into several and
1570        // mis-schedule the round-robin. `windows(2)` over a slice of size N is
1571        // dead code in release.
1572        #[cfg(debug_assertions)]
1573        debug_assert!(
1574            buf.windows(2)
1575                .all(|w| self.get(&w[0]).0 <= self.get(&w[1]).0),
1576            "apply_incremental_rotation requires input pre-sorted by urgency"
1577        );
1578        let len_before = buf.len();
1579        #[cfg(debug_assertions)]
1580        let expected_incremental = buf.iter().filter(|id| self.get(id).1).count();
1581        let mut total_incremental = 0usize;
1582        let mut i = 0;
1583        while i < buf.len() {
1584            let (urgency_i, _) = self.get(&buf[i]);
1585            let mut j = i + 1;
1586            while j < buf.len() {
1587                let (urgency_j, _) = self.get(&buf[j]);
1588                if urgency_j != urgency_i {
1589                    break;
1590                }
1591                j += 1;
1592            }
1593            // `buf[i..j]` is a contiguous run of same-urgency stream IDs.
1594            let bucket = &mut buf[i..j];
1595            if bucket.len() > 1 {
1596                // Stable partition: non-incremental first, incremental last,
1597                // each subrange staying in ascending stream-id order.
1598                bucket.sort_by_key(|id| self.get(id).1);
1599                let split = bucket.partition_point(|id| !self.get(id).1);
1600                let incremental_tail = &mut bucket[split..];
1601                if incremental_tail.len() > 1 {
1602                    // Rotate so the pass starts right after the stream that
1603                    // fired first previously. `partition_point` returns the
1604                    // first index whose stream ID > cursor (so cursor itself
1605                    // is still drained, but after the streams ahead of it).
1606                    let start =
1607                        incremental_tail.partition_point(|id| *id <= self.incremental_cursor);
1608                    incremental_tail.rotate_left(start);
1609                }
1610                total_incremental += incremental_tail.len();
1611            } else if bucket.len() == 1 && self.get(&bucket[0]).1 {
1612                total_incremental += 1;
1613            }
1614            i = j;
1615        }
1616        // Post-conditions: the routine is a permutation — it reorders in place
1617        // and never drops a stream id (len unchanged), and the returned count is
1618        // exactly the number of incremental streams present (the cursor-advance
1619        // callers rely on this being the true incremental-tail size).
1620        debug_assert_eq!(
1621            buf.len(),
1622            len_before,
1623            "rotation must preserve the slice (no streams dropped or added)"
1624        );
1625        #[cfg(debug_assertions)]
1626        debug_assert_eq!(
1627            total_incremental, expected_incremental,
1628            "reported incremental count must equal the incremental streams in buf"
1629        );
1630        total_incremental
1631    }
1632
1633    /// Advance the RFC 9218 §4 round-robin cursor after a write pass.
1634    ///
1635    /// `first_incremental_fired` is the stream ID that headed the incremental
1636    /// tail we just drained; the next pass will start at the next stream
1637    /// after that ID. Callers may pass `None` when no incremental streams
1638    /// were eligible, leaving the cursor where it was.
1639    pub fn advance_incremental_cursor(&mut self, first_incremental_fired: Option<StreamId>) {
1640        if let Some(id) = first_incremental_fired {
1641            self.incremental_cursor = id;
1642        }
1643    }
1644}
1645
1646/// Connection-level flow control state (RFC 9113 §6.9).
1647pub struct H2FlowControl {
1648    /// Connection-level send window (can go negative per RFC 9113 §6.9.2).
1649    pub window: i32,
1650    /// Bytes received since last connection-level WINDOW_UPDATE.
1651    pub received_bytes_since_update: u32,
1652    /// Queued stream_id -> accumulated increment for WINDOW_UPDATE frames (O(1) coalescing).
1653    pub pending_window_updates: HashMap<u32, u32>,
1654}
1655
1656/// Byte accounting for connection overhead attribution.
1657pub struct H2ByteAccounting {
1658    /// Bytes read on the zero stream not yet attributed to a stream.
1659    pub zero_bytes_read: usize,
1660    /// Overhead bytes received (connection-level frames).
1661    pub overhead_bin: usize,
1662    /// Overhead bytes sent (connection-level frames).
1663    pub overhead_bout: usize,
1664}
1665
1666/// Connection draining state for graceful shutdown.
1667pub struct H2DrainState {
1668    /// True when we've sent GOAWAY and are draining.
1669    pub draining: bool,
1670    /// Last stream ID from peer's GOAWAY (for retry decisions).
1671    pub peer_last_stream_id: Option<StreamId>,
1672    /// Wall-clock timestamp captured the first time this connection entered
1673    /// `draining` during soft-stop. Used together with
1674    /// [`Self::graceful_shutdown_deadline`] to decide when to force-close.
1675    /// Remains `None` until the proxy-initiated drain begins (peer-initiated
1676    /// drains via `handle_goaway_frame` don't arm the forced-close timer —
1677    /// the caller in `Mux::shutting_down` is the only writer).
1678    pub started_at: Option<Instant>,
1679    /// Wall-clock budget granted to in-flight streams after the initial
1680    /// `GOAWAY(NO_ERROR)`. `None` means "wait indefinitely" (knob value `0`).
1681    /// Default when unset upstream: 5 s (see `L7ListenerHandler`).
1682    pub graceful_shutdown_deadline: Option<std::time::Duration>,
1683}
1684
1685pub struct ConnectionH2<Front: SocketHandler> {
1686    /// Connection/session ULID propagated from the parent [`Mux`]. Used to
1687    /// stamp the session slot of the `[session req cluster backend]` log
1688    /// prefix emitted by this module's `log_context!` / `log_context_stream!`
1689    /// macros.
1690    pub session_ulid: Ulid,
1691    pub decoder: loona_hpack::Decoder<'static>,
1692    pub encoder: loona_hpack::Encoder<'static>,
1693    pub expect_read: Option<(H2StreamId, usize)>,
1694    pub expect_write: Option<H2StreamId>,
1695    pub last_stream_id: StreamId,
1696    pub local_settings: H2Settings,
1697    pub peer_settings: H2Settings,
1698    pub position: Position,
1699    pub prioriser: Prioriser,
1700    pub readiness: Readiness,
1701    pub socket: Front,
1702    pub state: H2State,
1703    pub streams: HashMap<StreamId, GlobalStreamId>,
1704    pub timeout_container: TimeoutContainer,
1705    /// Connection-level flow control state (send window, receive tracking, pending updates).
1706    pub flow_control: H2FlowControl,
1707    /// Highest stream ID accepted from the peer (used for GoAway last_stream_id).
1708    pub highest_peer_stream_id: StreamId,
1709    /// RFC 7541 §4.2 / §6.3 pending dynamic-table-size-update signal.
1710    ///
1711    /// `Some(new_size)` when a peer SETTINGS frame adjusted
1712    /// `SETTINGS_HEADER_TABLE_SIZE` and we have not yet prepended the
1713    /// matching `001xxxxx` HPACK directive to a header block. Consumed and
1714    /// cleared by [`H2BlockConverter::emit_pending_size_update_if_new_block`]
1715    /// on the next `Block::StatusLine` or `Block::Header` encoded for the
1716    /// connection. Until then the peer's decoder still has its previous
1717    /// (possibly larger) table cap, so emitting is a correctness
1718    /// requirement, not a nicety — see the RFC 9113 encoder-decoder
1719    /// synchronisation contract (§6.5.2).
1720    pub pending_table_size_update: Option<u32>,
1721    /// Reusable buffer for HPACK-encoded headers in the H2 block converter.
1722    pub converter_buf: Vec<u8>,
1723    /// Reusable buffer for lowercasing header keys in the H2 block converter.
1724    pub lowercase_buf: Vec<u8>,
1725    /// Reusable buffer for assembling cookie values in the H2 block converter.
1726    pub cookie_buf: Vec<u8>,
1727    /// Connection draining state for graceful shutdown.
1728    pub drain: H2DrainState,
1729    pub zero: GenericHttpStream,
1730    /// Byte accounting for connection overhead attribution.
1731    pub bytes: H2ByteAccounting,
1732    /// Flood detector for CVE mitigations (Rapid Reset, CONTINUATION, Ping, Settings floods).
1733    pub flood_detector: H2FloodDetector,
1734    /// RFC 9113 §6.5: timestamp when we sent SETTINGS and are awaiting ACK.
1735    /// If the peer does not ACK within SETTINGS_ACK_TIMEOUT, we send GOAWAY
1736    /// with SettingsTimeout error.
1737    pub settings_sent_at: Option<Instant>,
1738    /// Queued RST_STREAM frames to send: Vec<(stream_id, error_code)>.
1739    /// Used when refusing streams (MAX_CONCURRENT_STREAMS, buffer exhaustion)
1740    /// during readable — the actual write happens in the writable preamble
1741    /// to avoid conflicting with kawa.storage usage for frame payload discard.
1742    pub pending_rst_streams: Vec<(StreamId, H2Error)>,
1743    /// RFC 9113 §6.8: tracks stream IDs for which RST_STREAM has already been sent,
1744    /// preventing duplicate RST_STREAM frames on the wire.
1745    pub rst_sent: HashSet<StreamId>,
1746    /// Lifetime counter of RST_STREAM frames queued (pending + already flushed).
1747    /// Used to detect sustained misbehavior even when writable() drains the
1748    /// pending queue between readable() calls.
1749    pub total_rst_streams_queued: usize,
1750    /// Reusable buffer for priority-sorted stream IDs in write_streams().
1751    /// Cleared and reused each call to avoid per-frame allocation.
1752    priorities_buf: Vec<StreamId>,
1753    /// True once we've asked rustls to emit TLS close_notify for this frontend.
1754    close_notify_sent: bool,
1755    /// Per-listener H2 connection tuning (window size, max streams, shrink ratio).
1756    pub connection_config: H2ConnectionConfig,
1757    /// Maximum pending WINDOW_UPDATE entries before dropping.
1758    /// Derived from `connection_config.max_concurrent_streams` at construction.
1759    max_pending_window_updates: usize,
1760    /// Last `(connection_window, active_streams, pending_window_updates)` snapshot
1761    /// emitted by [`Self::gauge_connection_state`]. The snapshot represents this
1762    /// connection's *contribution* to the three `h2.connection.*` aggregate
1763    /// gauges; each call emits the signed delta against this snapshot via
1764    /// [`gauge_add!`] so the gauge sums across connections.
1765    ///
1766    /// Stays `None` until the first emission. [`Drop`] applies the negative of
1767    /// this snapshot so the connection's contribution is always rebalanced to
1768    /// zero on teardown — independent of which close path runs.
1769    last_gauge_snapshot: Option<(usize, usize, usize)>,
1770    /// Per-stream wall-clock timestamp of last meaningful activity (DATA or
1771    /// HEADERS frame receipt). Used to cancel streams that make no forward
1772    /// progress within [`Self::stream_idle_timeout`] — mitigates slow-multiplex
1773    /// Slowloris: connection-level idle timers reset on every frame, so a
1774    /// misbehaving peer can otherwise pin up to `max_concurrent_streams` slots
1775    /// for the full nominal connection timeout.
1776    ///
1777    /// Initialized when the stream is created and refreshed on each non-empty
1778    /// inbound DATA frame and on HEADERS for an existing stream (trailers).
1779    /// Empty DATA frames (CVE-2019-9518 vector) do NOT refresh the timer.
1780    pub stream_last_activity_at: HashMap<StreamId, Instant>,
1781    /// Per-stream timestamp of when the stream first became flow-control-stalled
1782    /// on the OUTBOUND (response) side — it holds buffered response data it
1783    /// cannot drain because its effective send window `min(stream.window,
1784    /// connection.window)` is exhausted (the HTTP/2 window-stall /
1785    /// WINDOW_UPDATE-drip vector). Distinct from [`Self::stream_last_activity_at`]:
1786    /// this map is armed/cleared ONLY by outbound flow-control progress and is
1787    /// NEVER refreshed by inbound DATA/HEADERS or connection-level frames, so a
1788    /// peer dribbling 1-byte DATA on a stalled stream cannot keep it warm (the
1789    /// liveness timer alone misses this because inbound drips refresh it). Reaped
1790    /// by [`Self::cancel_timed_out_streams`] after [`Self::stream_idle_timeout`].
1791    pub stream_fc_stalled_since: HashMap<StreamId, Instant>,
1792    /// Cumulative outbound flow-control bytes drained on a window-stalled stream
1793    /// SINCE its [`Self::stream_fc_stalled_since`] deadline was armed (M2
1794    /// cumulative-stall budget). An entry exists IFF `stream_fc_stalled_since`
1795    /// has one for the stream; the two maps are kept in lockstep at every
1796    /// arm/clear/evict site. Closes the `WINDOW_UPDATE(+1)`-drip residual: a
1797    /// 1-byte drain no longer clears the deadline — only cumulative progress
1798    /// reaching [`FC_STALL_CLEAR_FLOOR`] does.
1799    pub stream_fc_stalled_progress: HashMap<StreamId, usize>,
1800    /// Per-stream idle cap. Streams with no activity for longer than this are
1801    /// RST_STREAM(CANCEL)'d by [`Self::cancel_timed_out_streams`].
1802    pub stream_idle_timeout: std::time::Duration,
1803    /// RFC 9113 §5.1.2 back-pressure: count of stream refusals
1804    /// (REFUSED_STREAM emitted via [`Self::refuse_stream_and_discard`]) within
1805    /// the current back-pressure window. When the count exceeds
1806    /// [`BACKPRESSURE_REFUSAL_THRESHOLD`] inside one
1807    /// [`BACKPRESSURE_WINDOW_DURATION`] we halve the advertised
1808    /// `SETTINGS_MAX_CONCURRENT_STREAMS` to signal the peer to slow down.
1809    refuse_count_window: u32,
1810    /// Start timestamp for the current back-pressure window.
1811    refuse_window_start: Instant,
1812    /// Set once we have halved `local_settings.settings_max_concurrent_streams`
1813    /// in response to a refusal burst. Prevents the cap from collapsing to 0
1814    /// on sustained abuse — a single halving per connection is sufficient to
1815    /// signal back-pressure; further bursts trigger `EnhanceYourCalm`.
1816    mcs_backpressure_applied: bool,
1817}
1818impl<Front: SocketHandler> std::fmt::Debug for ConnectionH2<Front> {
1819    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1820        f.debug_struct("ConnectionH2")
1821            .field("position", &self.position)
1822            .field("state", &self.state)
1823            .field("expect", &self.expect_read)
1824            .field("readiness", &self.readiness)
1825            .field("local_settings", &self.local_settings)
1826            .field("peer_settings", &self.peer_settings)
1827            .field("socket", &self.socket.socket_ref())
1828            .field("streams", &self.streams)
1829            .field("zero", &self.zero.storage.meter(20))
1830            .field("window", &self.flow_control.window)
1831            .field("total_rst_streams_queued", &self.total_rst_streams_queued)
1832            .finish()
1833    }
1834}
1835
1836/// Symmetric tear-down for the three `h2.connection.*` aggregate gauges:
1837/// whatever positive contribution this connection made via
1838/// [`ConnectionH2::gauge_connection_state`] is subtracted back out when the
1839/// connection is dropped.
1840///
1841/// Using `Drop` (rather than wiring decrements into every close path —
1842/// `graceful_goaway`, `force_disconnect`, `handle_goaway_frame`, `Mux::close`,
1843/// stream-id exhaustion, panic-unwind) is what guarantees the gauge is
1844/// arithmetically symmetric regardless of which path teardown took. Past
1845/// underflow incidents (commits a650ad69, d2f01ed4) have all been
1846/// missing-decrement bugs that `Drop` makes structurally impossible.
1847impl<Front: SocketHandler> Drop for ConnectionH2<Front> {
1848    fn drop(&mut self) {
1849        self.release_connection_gauges();
1850    }
1851}
1852
1853#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1854pub enum H2StreamId {
1855    Zero,
1856    Other { id: StreamId, gid: GlobalStreamId },
1857}
1858
1859impl<Front: SocketHandler> ConnectionH2<Front> {
1860    fn frontend_hung_up_while_draining(&self) -> bool {
1861        matches!(self.position, Position::Server)
1862            && self.drain.draining
1863            && (self.readiness.event.is_hup() || self.readiness.event.is_error())
1864    }
1865
1866    /// Once the final GOAWAY has been queued and all streams/control frames are
1867    /// gone, a peer-side HUP/ERR means any remaining rustls backlog is no
1868    /// longer deliverable. Waiting on `socket_wants_write()` in that state can
1869    /// deadlock shutdown forever because GOAWAY disables further frame reads.
1870    fn peer_gone_after_final_goaway(&self) -> bool {
1871        self.frontend_hung_up_while_draining()
1872            && matches!(self.state, H2State::GoAway | H2State::Error)
1873            && self.streams.is_empty()
1874            && self.expect_write.is_none()
1875            && self.zero.storage.is_empty()
1876    }
1877
1878    /// Shared constructor for both server and client H2 connections.
1879    ///
1880    /// Differences between server and client are captured by the caller-provided
1881    /// `position`, `expect_read`, and `readiness_interest` parameters.
1882    #[allow(clippy::too_many_arguments)]
1883    pub(super) fn new(
1884        session_ulid: Ulid,
1885        socket: Front,
1886        position: super::Position,
1887        pool: std::rc::Weak<std::cell::RefCell<crate::pool::Pool>>,
1888        flood_config: H2FloodConfig,
1889        connection_config: H2ConnectionConfig,
1890        stream_idle_timeout: std::time::Duration,
1891        graceful_shutdown_deadline: Option<std::time::Duration>,
1892        timeout_container: crate::timer::TimeoutContainer,
1893        expect_read: Option<(H2StreamId, usize)>,
1894        readiness_interest: sozu_command::ready::Ready,
1895    ) -> Option<Self> {
1896        let buffer = pool
1897            .upgrade()
1898            .and_then(|pool| pool.borrow_mut().checkout())?;
1899        let local_settings = H2Settings {
1900            settings_max_concurrent_streams: connection_config.max_concurrent_streams,
1901            ..H2Settings::default()
1902        };
1903        let mut decoder = loona_hpack::Decoder::new();
1904        // RFC 7541 §4.2: enforce SETTINGS_HEADER_TABLE_SIZE as the upper bound
1905        // for dynamic table size updates from the peer
1906        decoder.set_max_allowed_table_size(local_settings.settings_header_table_size as usize);
1907        Some(ConnectionH2 {
1908            session_ulid,
1909            decoder,
1910            encoder: loona_hpack::Encoder::new(),
1911            expect_read,
1912            expect_write: None,
1913            last_stream_id: 0,
1914            local_settings,
1915            peer_settings: H2Settings::default(),
1916            position,
1917            prioriser: Prioriser::default(),
1918            readiness: crate::Readiness {
1919                interest: readiness_interest,
1920                event: Ready::EMPTY,
1921            },
1922            socket,
1923            state: H2State::ClientPreface,
1924            streams: std::collections::HashMap::with_capacity(8),
1925            timeout_container,
1926            flow_control: H2FlowControl {
1927                window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
1928                received_bytes_since_update: 0,
1929                pending_window_updates: HashMap::new(),
1930            },
1931            highest_peer_stream_id: 0,
1932            pending_table_size_update: None,
1933            converter_buf: Vec::new(),
1934            lowercase_buf: Vec::new(),
1935            cookie_buf: Vec::new(),
1936            drain: H2DrainState {
1937                draining: false,
1938                peer_last_stream_id: None,
1939                started_at: None,
1940                graceful_shutdown_deadline,
1941            },
1942            zero: kawa::Kawa::new(kawa::Kind::Request, kawa::Buffer::new(buffer)),
1943            bytes: H2ByteAccounting {
1944                zero_bytes_read: 0,
1945                overhead_bin: 0,
1946                overhead_bout: 0,
1947            },
1948            flood_detector: H2FloodDetector::new(flood_config),
1949            settings_sent_at: None,
1950            pending_rst_streams: Vec::new(),
1951            rst_sent: std::collections::HashSet::new(),
1952            total_rst_streams_queued: 0,
1953            priorities_buf: Vec::new(),
1954            close_notify_sent: false,
1955            max_pending_window_updates: 1 + connection_config.max_concurrent_streams as usize * 4,
1956            connection_config,
1957            last_gauge_snapshot: None,
1958            stream_last_activity_at: HashMap::new(),
1959            stream_fc_stalled_since: HashMap::new(),
1960            stream_fc_stalled_progress: HashMap::new(),
1961            stream_idle_timeout,
1962            refuse_count_window: 0,
1963            refuse_window_start: Instant::now(),
1964            mcs_backpressure_applied: false,
1965        })
1966    }
1967
1968    /// Start TLS close_notify on the frontend and keep the session alive until
1969    /// rustls has flushed the generated records.
1970    pub fn initiate_close_notify(&mut self) -> bool {
1971        if !self.position.is_server()
1972            || matches!(
1973                self.state,
1974                H2State::ClientPreface | H2State::ClientSettings | H2State::ServerSettings
1975            )
1976        {
1977            return false;
1978        }
1979        if !self.close_notify_sent {
1980            trace!("{} H2 initiating CLOSE_NOTIFY", log_context!(self));
1981            self.socket.socket_close();
1982            self.close_notify_sent = true;
1983        }
1984        if self.socket.socket_wants_write() {
1985            self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
1986            self.ensure_tls_flushed();
1987            true
1988        } else {
1989            false
1990        }
1991    }
1992
1993    fn expect_header(&mut self) {
1994        self.state = H2State::Header;
1995        self.expect_read = Some((H2StreamId::Zero, 9));
1996    }
1997
1998    /// Process the `H2State::Header` state: parse a 9-byte frame header from
1999    /// `self.zero`, validate the stream, create new streams if needed, and
2000    /// transition to `H2State::Frame` for the payload.
2001    ///
2002    /// Returns `MuxResult` — the caller should propagate the result directly.
2003    fn handle_header_state<L>(&mut self, context: &mut Context<L>) -> MuxResult
2004    where
2005        L: ListenerHandler + L7ListenerHandler,
2006    {
2007        let i = self.zero.storage.data();
2008        trace!("{}   header: {:?}", log_context!(self), i);
2009        match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2010            Ok((_, header)) => {
2011                trace!("{} {:#?}", log_context!(self), header);
2012                self.zero.storage.clear();
2013                let stream_id = header.stream_id;
2014                // RFC 9113 §6.10: CONTINUATION frames MUST be preceded by a
2015                // HEADERS or PUSH_PROMISE frame without END_HEADERS. When we
2016                // reach `handle_header_state`, we are between frames and no
2017                // header block is in progress (otherwise the state would be
2018                // `H2State::ContinuationHeader`). A CONTINUATION frame arriving
2019                // here is therefore standalone and MUST be treated as a
2020                // connection error of type PROTOCOL_ERROR.
2021                if header.frame_type == FrameType::Continuation {
2022                    error!(
2023                        "{} standalone CONTINUATION frame on stream {} without preceding HEADERS",
2024                        log_context!(self),
2025                        stream_id
2026                    );
2027                    return self.goaway(H2Error::ProtocolError);
2028                }
2029                // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
2030                // Route unknown frames (and any stream_id == 0 control frame)
2031                // through stream 0 (the connection-level buffer) so
2032                // `handle_frame` can drop them without touching stream state.
2033                let read_stream = if stream_id == 0
2034                    || matches!(header.frame_type, FrameType::Unknown(_))
2035                {
2036                    H2StreamId::Zero
2037                } else if let Some(global_stream_id) = self.streams.get(&stream_id) {
2038                    let allowed_on_half_closed = header.frame_type == FrameType::WindowUpdate
2039                        || header.frame_type == FrameType::Priority
2040                        || header.frame_type == FrameType::RstStream;
2041                    let stream = &context.streams[*global_stream_id];
2042                    // Use the position-aware end_of_stream flag:
2043                    // - Server reads from front (client requests)
2044                    // - Client reads from back (backend responses)
2045                    let received_eos = if self.position.is_server() {
2046                        stream.front_received_end_of_stream
2047                    } else {
2048                        stream.back_received_end_of_stream
2049                    };
2050                    trace!(
2051                        "{} REQUESTING EXISTING STREAM {}: {}/{:?}",
2052                        log_context!(self),
2053                        stream_id,
2054                        received_eos,
2055                        stream.state
2056                    );
2057                    if !allowed_on_half_closed && (received_eos || !stream.state.is_open()) {
2058                        error!(
2059                            "{} CANNOT RECEIVE {:?} ON THIS STREAM {:?}",
2060                            log_context!(self),
2061                            header.frame_type,
2062                            stream.state
2063                        );
2064                        return self.goaway(H2Error::StreamClosed);
2065                    }
2066                    // RFC 9113 §8.1: a HEADERS frame received in the body
2067                    // phase is a trailer block and MUST carry END_STREAM. This
2068                    // closes the request-smuggling primitive where a peer sends
2069                    // HEADERS, DATA, HEADERS (no END_STREAM) to chain header
2070                    // blocks on the same stream ID.
2071                    //
2072                    // Discriminate from the read-side Kawa parsing phase rather
2073                    // than stream existence: on Position::Client the stream is
2074                    // created when we send the request to the backend, so the
2075                    // initial backend response HEADERS legitimately arrives on
2076                    // an existing stream. Similarly, 1xx→final transitions on
2077                    // either side may yield multiple HEADERS frames before the
2078                    // body begins (kawa clears back to initial / terminated on
2079                    // 1xx; neither is main_phase). Only HEADERS arriving once
2080                    // the read side has transitioned to Body/Chunks parsing —
2081                    // i.e. after headers were fully consumed and body framing
2082                    // is in progress — may be a trailer.
2083                    let read_in_body = if self.position.is_server() {
2084                        stream.front.is_main_phase()
2085                    } else {
2086                        stream.back.is_main_phase()
2087                    };
2088                    if header.frame_type == FrameType::Headers
2089                        && read_in_body
2090                        && header.flags & parser::FLAG_END_STREAM == 0
2091                    {
2092                        error!(
2093                            "{} HEADERS without END_STREAM on open stream {} in body phase: trailers MUST carry END_STREAM",
2094                            log_context!(self),
2095                            stream_id
2096                        );
2097                        return self.goaway(H2Error::ProtocolError);
2098                    }
2099                    if header.frame_type == FrameType::Data {
2100                        H2StreamId::Other {
2101                            id: stream_id,
2102                            gid: *global_stream_id,
2103                        }
2104                    } else {
2105                        H2StreamId::Zero
2106                    }
2107                } else {
2108                    // RFC 9113 §5.1.1: stream identifiers MUST be strictly
2109                    // increasing. Tightened from `>=` to `>` so that a peer
2110                    // cannot re-use `self.last_stream_id` (which would
2111                    // conflict with our own server-pushed streams if we
2112                    // ever enable push in the future). For the first
2113                    // request on a fresh connection `last_stream_id == 0`
2114                    // and any client-initiated odd stream still passes.
2115                    if header.frame_type == FrameType::Headers
2116                        && self.position.is_server()
2117                        && stream_id & 1 == 1
2118                        && stream_id > self.last_stream_id
2119                    {
2120                        // RFC 9113 §6.8: after sending a GOAWAY, the proxy
2121                        // MUST NOT accept new streams.
2122                        // `graceful_goaway` sets `drain.draining = true`
2123                        // and sends an initial GOAWAY with last_stream_id =
2124                        // STREAM_ID_MAX (so in-flight requests are still
2125                        // accepted), but the contract for *new* peer-
2126                        // initiated streams is that they must be refused.
2127                        // Without this check, a peer racing the drain
2128                        // window could open arbitrary new streams between
2129                        // the initial and final GOAWAY emission.
2130                        if self.drain.draining {
2131                            if stream_id > self.highest_peer_stream_id {
2132                                self.highest_peer_stream_id = stream_id;
2133                            }
2134                            return self.refuse_stream_and_discard(
2135                                stream_id,
2136                                H2Error::RefusedStream,
2137                                header.payload_len,
2138                            );
2139                        }
2140                        if self.streams.len()
2141                            >= self.local_settings.settings_max_concurrent_streams as usize
2142                        {
2143                            error!(
2144                                "{} MAX CONCURRENT STREAMS: limit={}, current={}",
2145                                log_context!(self),
2146                                self.local_settings.settings_max_concurrent_streams,
2147                                self.streams.len()
2148                            );
2149                            // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
2150                            // queueing RST_STREAM so GOAWAY reports the correct
2151                            // last_stream_id if the connection closes later.
2152                            if stream_id > self.highest_peer_stream_id {
2153                                self.highest_peer_stream_id = stream_id;
2154                            }
2155                            return self.refuse_stream_and_discard(
2156                                stream_id,
2157                                H2Error::RefusedStream,
2158                                header.payload_len,
2159                            );
2160                        }
2161                        match self.create_stream(stream_id, context) {
2162                            Some(_) => {}
2163                            None => {
2164                                // Buffer pool exhaustion is transient — refuse
2165                                // this stream but keep the connection alive so
2166                                // existing streams can complete and free buffers.
2167                                error!(
2168                                    "{} Could not create stream {}: buffer pool exhausted",
2169                                    log_context!(self),
2170                                    stream_id
2171                                );
2172                                // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
2173                                // queueing RST_STREAM so GOAWAY reports the correct
2174                                // last_stream_id if the connection closes later.
2175                                if stream_id > self.highest_peer_stream_id {
2176                                    self.highest_peer_stream_id = stream_id;
2177                                }
2178                                return self.refuse_stream_and_discard(
2179                                    stream_id,
2180                                    H2Error::RefusedStream,
2181                                    header.payload_len,
2182                                );
2183                            }
2184                        }
2185                    } else if header.frame_type != FrameType::Priority {
2186                        // Distinguish closed vs idle: check whether the stream
2187                        // was previously opened. For Server position, compare
2188                        // against highest_peer_stream_id (client-initiated).
2189                        // For Client position, compare against last_stream_id
2190                        // (our own initiated streams) since the peer never
2191                        // initiates streams on a backend connection.
2192                        let is_closed_stream = if self.position.is_server() {
2193                            header.stream_id <= self.highest_peer_stream_id
2194                        } else {
2195                            header.stream_id < self.last_stream_id
2196                        };
2197                        if is_closed_stream {
2198                            match header.frame_type {
2199                                FrameType::RstStream | FrameType::WindowUpdate => {
2200                                    // RFC 9113 §5.1: RST_STREAM and WINDOW_UPDATE
2201                                    // on a closed stream can arrive due to race
2202                                    // conditions and should be consumed/discarded.
2203                                    debug!(
2204                                        "{} Ignoring {:?} on closed stream {}",
2205                                        log_context!(self),
2206                                        header.frame_type,
2207                                        header.stream_id
2208                                    );
2209                                    self.flood_detector.glitch_count += 1;
2210                                    check_flood_or_return!(self);
2211                                }
2212                                FrameType::Data => {
2213                                    // RFC 9113 §5.1: DATA on a closed stream is a
2214                                    // stream error of type STREAM_CLOSED. Queue
2215                                    // RST_STREAM (not GOAWAY) to preserve the
2216                                    // connection for other streams. The payload is
2217                                    // still routed through stream 0 so handle_frame
2218                                    // can do connection-level flow control accounting.
2219                                    debug!(
2220                                        "{} DATA on closed stream {}, sending RST_STREAM(STREAM_CLOSED)",
2221                                        log_context!(self),
2222                                        header.stream_id
2223                                    );
2224                                    self.flood_detector.glitch_count += 1;
2225                                    check_flood_or_return!(self);
2226                                    if let Some(result) =
2227                                        self.enqueue_rst(header.stream_id, H2Error::StreamClosed)
2228                                    {
2229                                        return result;
2230                                    }
2231                                }
2232                                _ => {
2233                                    // RFC 9113 §5.1: HEADERS or other frames on a
2234                                    // closed stream → connection error STREAM_CLOSED.
2235                                    error!(
2236                                        "{} Received {:?} on closed stream {}, sending GOAWAY(STREAM_CLOSED)",
2237                                        log_context!(self),
2238                                        header.frame_type,
2239                                        header.stream_id
2240                                    );
2241                                    return self.goaway(H2Error::StreamClosed);
2242                                }
2243                            }
2244                        } else {
2245                            error!(
2246                                "{} Received {:?} on idle stream {}, sending GOAWAY(PROTOCOL_ERROR)",
2247                                log_context!(self),
2248                                header.frame_type,
2249                                header.stream_id
2250                            );
2251                            return self.goaway(H2Error::ProtocolError);
2252                        }
2253                    }
2254                    H2StreamId::Zero
2255                };
2256                trace!(
2257                    "{} {} {:?} {:#?}",
2258                    log_context!(self),
2259                    header.stream_id,
2260                    stream_id,
2261                    self.streams
2262                );
2263                self.expect_read = Some((read_stream, header.payload_len as usize));
2264                self.state = H2State::Frame(header);
2265            }
2266            Err(error) => {
2267                let error = error_nom_to_h2(error);
2268                error!("{} COULD NOT PARSE FRAME HEADER", log_context!(self));
2269                return self.goaway(error);
2270            }
2271        };
2272        MuxResult::Continue
2273    }
2274
2275    /// Process the `H2State::ContinuationHeader` state: parse a CONTINUATION
2276    /// frame header from `self.zero`, validate stream ID continuity, track
2277    /// flood detection counters, and transition to `ContinuationFrame`.
2278    ///
2279    /// The `headers` parameter is the accumulated HEADERS context from the
2280    /// initial HEADERS frame (cloned from the state enum to avoid borrow
2281    /// conflicts).
2282    fn handle_continuation_header_state(&mut self, headers: &Headers) -> MuxResult {
2283        let i = self.zero.storage.unparsed_data();
2284        trace!("{}   continuation header: {:?}", log_context!(self), i);
2285        match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2286            Ok((
2287                _,
2288                FrameHeader {
2289                    payload_len,
2290                    frame_type: FrameType::Continuation,
2291                    flags,
2292                    stream_id,
2293                },
2294            )) => {
2295                if self.zero.storage.end < 9 {
2296                    error!(
2297                        "{} CONTINUATION header: storage.end ({}) too small to remove frame header",
2298                        log_context!(self),
2299                        self.zero.storage.end
2300                    );
2301                    return self.goaway(H2Error::InternalError);
2302                }
2303                self.zero.storage.end -= 9;
2304                if stream_id != headers.stream_id {
2305                    error!(
2306                        "{} CONTINUATION stream_id {} does not match HEADERS stream_id {}",
2307                        log_context!(self),
2308                        stream_id,
2309                        headers.stream_id
2310                    );
2311                    return self.goaway(H2Error::ProtocolError);
2312                }
2313                // CVE-2024-27316: track CONTINUATION frame count and accumulated size
2314                let cont_count_before = self.flood_detector.continuation_count;
2315                let acc_size_before = self.flood_detector.accumulated_header_size;
2316                self.flood_detector.continuation_count += 1;
2317                self.flood_detector.accumulated_header_size = self
2318                    .flood_detector
2319                    .accumulated_header_size
2320                    .saturating_add(payload_len);
2321                // Per-block CONTINUATION accounting must grow monotonically
2322                // within a header block: each frame bumps the count by one and
2323                // the accumulated size by the frame's payload (never shrinks
2324                // mid-block). `reset_continuation` is the only thing allowed to
2325                // zero these — and only once the block is complete.
2326                debug_assert_eq!(
2327                    self.flood_detector.continuation_count,
2328                    cont_count_before + 1,
2329                    "CONTINUATION per-block counter must advance by one per frame"
2330                );
2331                debug_assert!(
2332                    self.flood_detector.accumulated_header_size >= acc_size_before,
2333                    "accumulated header size must not shrink within a header block"
2334                );
2335                check_flood_or_return!(self);
2336                // RFC 9113 §10.5.1: reject header blocks that cannot be
2337                // buffered. Previously we silently removed READABLE interest
2338                // when amount > available_space, stalling the connection.
2339                // If the payload still fits in our zero buffer we can refuse
2340                // just this stream (RST_STREAM + drain); if not, the
2341                // connection can no longer decode header blocks safely and we
2342                // escalate to GOAWAY(EnhanceYourCalm).
2343                if self.flood_detector.accumulated_header_size
2344                    > self.flood_detector.config.max_header_list_size
2345                {
2346                    error!(
2347                        "{} CONTINUATION accumulated header size {} exceeds {}",
2348                        log_context!(self),
2349                        self.flood_detector.accumulated_header_size,
2350                        self.flood_detector.config.max_header_list_size
2351                    );
2352                    if (payload_len as usize) > self.zero.storage.available_space() {
2353                        return self.goaway(H2Error::EnhanceYourCalm);
2354                    }
2355                    // Remove the already-created stream slot before refusing,
2356                    // so it does not leak against MAX_CONCURRENT_STREAMS. Route
2357                    // through `remove_dead_stream` so the expect_write/read
2358                    // invariant (§LIFECYCLE.md 5.4) holds on this path too.
2359                    if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
2360                        self.remove_dead_stream(stream_id, global_stream_id);
2361                    }
2362                    return self.refuse_stream_and_discard(
2363                        stream_id,
2364                        H2Error::RefusedStream,
2365                        payload_len,
2366                    );
2367                }
2368                if (payload_len as usize) > self.zero.storage.available_space() {
2369                    error!(
2370                        "{} CONTINUATION payload {} exceeds buffer space {}",
2371                        log_context!(self),
2372                        payload_len,
2373                        self.zero.storage.available_space()
2374                    );
2375                    return self.goaway(H2Error::EnhanceYourCalm);
2376                }
2377                self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2378                let mut headers = headers.clone();
2379                headers.end_headers = flags & parser::FLAG_END_HEADERS != 0;
2380                headers.header_block_fragment.len = headers
2381                    .header_block_fragment
2382                    .len
2383                    .saturating_add(payload_len);
2384                self.state = H2State::ContinuationFrame(headers);
2385            }
2386            Err(error) => {
2387                let error = error_nom_to_h2(error);
2388                error!("{} COULD NOT PARSE CONTINUATION HEADER", log_context!(self));
2389                return self.goaway(error);
2390            }
2391            other => {
2392                error!(
2393                    "{} UNEXPECTED {:?} WHILE PARSING CONTINUATION HEADER",
2394                    log_context!(self),
2395                    other
2396                );
2397                return self.goaway(H2Error::ProtocolError);
2398            }
2399        };
2400        MuxResult::Continue
2401    }
2402
2403    pub fn readable<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2404    where
2405        E: Endpoint,
2406        L: ListenerHandler + L7ListenerHandler,
2407    {
2408        self.prune_inactive_streams_while_closing(context);
2409        // Pass 4 Medium #3: per-stream idle guard. Slow-multiplex Slowloris
2410        // sends one byte or a control frame per stream just often enough to
2411        // reset the connection-level timer; per-stream deadlines catch it.
2412        self.cancel_timed_out_streams(context, &mut endpoint);
2413
2414        // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
2415        if let Some(sent_at) = self.settings_sent_at {
2416            if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
2417                warn!(
2418                    "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
2419                    log_context!(self),
2420                    SETTINGS_ACK_TIMEOUT
2421                );
2422                return self.goaway(H2Error::SettingsTimeout);
2423            }
2424        }
2425
2426        // Don't reset the timeout unconditionally here. Only application data
2427        // (DATA/HEADERS frames) should reset the timeout. H2 control frames
2428        // (PING, WINDOW_UPDATE, SETTINGS) must NOT reset it, otherwise a peer
2429        // sending periodic PINGs prevents timeout detection on stuck sessions.
2430        // The timeout is reset:
2431        // - Below, when reading DATA payload (H2StreamId::Other)
2432        // - In handle_frame(), when processing HEADERS frames
2433        let (stream_id, kawa) = if let Some((stream_id, amount)) = self.expect_read {
2434            let (kawa, did) = match stream_id {
2435                H2StreamId::Zero => (&mut self.zero, usize::MAX),
2436                H2StreamId::Other {
2437                    gid: global_stream_id,
2438                    ..
2439                } => {
2440                    // Reading DATA frame payload for an application stream.
2441                    // This is real application activity — reset the timeout.
2442                    self.timeout_container.reset();
2443                    (
2444                        context.streams[global_stream_id]
2445                            .split(&self.position)
2446                            .rbuffer,
2447                        global_stream_id,
2448                    )
2449                }
2450            };
2451            trace!(
2452                "{} {:?}({:?}, {})",
2453                log_context!(self),
2454                self.state,
2455                stream_id,
2456                amount
2457            );
2458            if amount > 0 {
2459                if amount > kawa.storage.available_space() {
2460                    self.readiness.interest.remove(Ready::READABLE);
2461                    return MuxResult::Continue;
2462                }
2463                let (size, status) = self.socket.socket_read(&mut kawa.storage.space()[..amount]);
2464                context.debug.push(DebugEvent::SocketIO(0, did, size));
2465                kawa.storage.fill(size);
2466                self.position.count_bytes_in_counter(size);
2467                self.bytes.zero_bytes_read += size;
2468                if update_readiness_after_read(size, status, &mut self.readiness) {
2469                    if matches!(self.position, Position::Server)
2470                        && self.drain.draining
2471                        && matches!(status, SocketResult::Closed | SocketResult::Error)
2472                    {
2473                        // During graceful drain, a frontend EOF/HUP means no
2474                        // further frame headers or payload bytes can arrive.
2475                        // Keeping expect_read here strands the connection in
2476                        // Header/Frame forever even after the peer is gone.
2477                        self.expect_read = None;
2478                    }
2479                    return MuxResult::Continue;
2480                } else if size == amount {
2481                    self.expect_read = None;
2482                } else {
2483                    self.expect_read = Some((stream_id, amount - size));
2484                    if let (H2State::ClientPreface, Position::Server) =
2485                        (&self.state, &self.position)
2486                    {
2487                        let i = kawa.storage.data();
2488                        if !b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n".starts_with(i) {
2489                            debug!("{} EARLY INVALID PREFACE: {:?}", log_context!(self), i);
2490                            return self.force_disconnect();
2491                        }
2492                    }
2493                    return MuxResult::Continue;
2494                }
2495            } else {
2496                self.expect_read = None;
2497            }
2498            (stream_id, kawa)
2499        } else {
2500            self.readiness.event.remove(Ready::READABLE);
2501            return MuxResult::Continue;
2502        };
2503        match (&self.state, &self.position) {
2504            (H2State::Error, _)
2505            | (H2State::GoAway, _)
2506            | (H2State::ServerSettings, Position::Server)
2507            | (H2State::ClientPreface, Position::Client(..))
2508            | (H2State::ClientSettings, Position::Client(..)) => {
2509                error!(
2510                    "{} Unexpected combination: (Readable, {:?}, {:?})",
2511                    log_context!(self),
2512                    self.state,
2513                    self.position
2514                );
2515                return self.force_disconnect();
2516            }
2517            (H2State::Discard, _) => {
2518                let _i = kawa.storage.data();
2519                trace!("{} DISCARDING: {:?}", log_context!(self), _i);
2520                kawa.storage.clear();
2521                self.attribute_bytes_to_overhead();
2522                self.expect_header();
2523            }
2524            (H2State::ClientPreface, Position::Server) => {
2525                let i = kawa.storage.data();
2526                let i = match parser::preface(i) {
2527                    Ok((i, _)) => i,
2528                    Err(_) => return self.force_disconnect(),
2529                };
2530                match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2531                    Ok((
2532                        _,
2533                        FrameHeader {
2534                            payload_len,
2535                            frame_type: FrameType::Settings,
2536                            flags: 0,
2537                            stream_id: 0,
2538                        },
2539                    )) => {
2540                        kawa.storage.clear();
2541                        self.state = H2State::ClientSettings;
2542                        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2543                    }
2544                    _ => return self.force_disconnect(),
2545                };
2546            }
2547            (H2State::ClientSettings, Position::Server) => {
2548                let i = kawa.storage.data();
2549                let settings = match parser::settings_frame(
2550                    i,
2551                    &FrameHeader {
2552                        payload_len: i.len() as u32,
2553                        frame_type: FrameType::Settings,
2554                        flags: 0,
2555                        stream_id: 0,
2556                    },
2557                ) {
2558                    Ok((_, settings)) => {
2559                        kawa.storage.clear();
2560                        settings
2561                    }
2562                    Err(_) => return self.force_disconnect(),
2563                };
2564                let kawa = &mut self.zero;
2565                match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
2566                    Ok((_, size)) => {
2567                        kawa.storage.fill(size);
2568                        incr!(names::h2::FRAMES_TX_SETTINGS);
2569                        // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
2570                        self.settings_sent_at = Some(Instant::now());
2571                    }
2572                    Err(error) => {
2573                        error!(
2574                            "{} Could not serialize SettingsFrame: {:?}",
2575                            log_context!(self),
2576                            error
2577                        );
2578                        return self.force_disconnect();
2579                    }
2580                };
2581
2582                self.state = H2State::ServerSettings;
2583                self.expect_write = Some(H2StreamId::Zero);
2584                self.readiness.signal_pending_write();
2585                return self.handle_frame(settings, 0, context, endpoint);
2586            }
2587            (H2State::ServerSettings, Position::Client(..)) => {
2588                let i = kawa.storage.data();
2589                match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2590                    Ok((
2591                        _,
2592                        header @ FrameHeader {
2593                            payload_len,
2594                            frame_type: FrameType::Settings,
2595                            flags: 0,
2596                            stream_id: 0,
2597                        },
2598                    )) => {
2599                        kawa.storage.clear();
2600                        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2601                        self.state = H2State::Frame(header)
2602                    }
2603                    _ => return self.force_disconnect(),
2604                };
2605            }
2606            (H2State::Header, _) => {
2607                return self.handle_header_state(context);
2608            }
2609            (H2State::ContinuationHeader(headers), _) => {
2610                let headers = headers.clone();
2611                return self.handle_continuation_header_state(&headers);
2612            }
2613            (H2State::Frame(header), _) => {
2614                let i = kawa.storage.unparsed_data();
2615                trace!("{}   data: {:?}", log_context!(self), i);
2616                let wire_payload_len = header.payload_len;
2617                let frame = match parser::frame_body(i, header) {
2618                    Ok((_, frame)) => frame,
2619                    Err(error) => {
2620                        let error = error_nom_to_h2(error);
2621                        error!("{} COULD NOT PARSE FRAME BODY", log_context!(self));
2622                        return self.goaway(error);
2623                    }
2624                };
2625                if let H2StreamId::Zero = stream_id {
2626                    if header.frame_type == FrameType::Headers {
2627                        kawa.storage.head = kawa.storage.end;
2628                    } else {
2629                        kawa.storage.end = kawa.storage.head;
2630                    }
2631                }
2632                self.expect_header();
2633                return self.handle_frame(frame, wire_payload_len, context, endpoint);
2634            }
2635            (H2State::ContinuationFrame(headers), _) => {
2636                kawa.storage.head = kawa.storage.end;
2637                let i = kawa.storage.data();
2638                trace!("{}   data: {:?}", log_context!(self), i);
2639                let headers = headers.clone();
2640                self.expect_header();
2641                return self.handle_frame(Frame::Headers(headers), 0, context, endpoint);
2642            }
2643        }
2644        MuxResult::Continue
2645    }
2646
2647    /// Update the H2 connection-level *aggregate* gauges with this connection's
2648    /// current contribution, expressed as a signed delta against the last
2649    /// snapshot we emitted.
2650    ///
2651    /// The three metrics are emitted via [`gauge_add!`] (lifecycle deltas) so
2652    /// that the dashboard sees the **sum across all live H2 connections**:
2653    ///
2654    /// - `h2.connection.window_bytes` — sum of available connection-level
2655    ///   send-window bytes. Negative per-connection windows clamp to 0 so the
2656    ///   aggregate represents only available capacity, not deficit.
2657    /// - `h2.connection.active_streams` — sum of in-flight streams across
2658    ///   every H2 connection.
2659    /// - `h2.connection.pending_window_updates` — sum of queued (un-flushed)
2660    ///   per-stream WINDOW_UPDATE entries across every H2 connection.
2661    ///
2662    /// Called from the write hot path; emits nothing when the snapshot is
2663    /// unchanged so the steady state stays cheap. The paired decrement for
2664    /// every increment is provided by [`Drop`], which subtracts the final
2665    /// snapshot when the connection is dropped — keeping the aggregate
2666    /// arithmetically symmetric independent of which close path runs
2667    /// (`graceful_goaway`, `force_disconnect`, `handle_goaway_frame`,
2668    /// `Mux::close`, panic-unwind, …).
2669    fn gauge_connection_state(&mut self) {
2670        let snapshot = (
2671            self.flow_control.window.max(0) as usize,
2672            self.streams.len(),
2673            self.flow_control.pending_window_updates.len(),
2674        );
2675        if self.last_gauge_snapshot == Some(snapshot) {
2676            return;
2677        }
2678        let prev = self.last_gauge_snapshot.unwrap_or((0, 0, 0));
2679        // Diff in i64 — usize cannot represent the negative side of the delta.
2680        let dw = snapshot.0 as i64 - prev.0 as i64;
2681        let ds = snapshot.1 as i64 - prev.1 as i64;
2682        let du = snapshot.2 as i64 - prev.2 as i64;
2683        if dw != 0 {
2684            gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, dw);
2685        }
2686        if ds != 0 {
2687            gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, ds);
2688        }
2689        if du != 0 {
2690            gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, du);
2691        }
2692        self.last_gauge_snapshot = Some(snapshot);
2693    }
2694
2695    /// Subtract this connection's contribution from the three aggregate
2696    /// `h2.connection.*` gauges. Idempotent: clears `last_gauge_snapshot` so a
2697    /// second call (or a [`Drop`] on top of an explicit reset) is a no-op.
2698    ///
2699    /// Pairs with every prior call to [`Self::gauge_connection_state`]; called
2700    /// from [`Drop`] so the symmetry is guaranteed regardless of the close
2701    /// path.
2702    fn release_connection_gauges(&mut self) {
2703        if let Some((w, s, u)) = self.last_gauge_snapshot.take() {
2704            if w != 0 {
2705                gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, -(w as i64));
2706            }
2707            if s != 0 {
2708                gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, -(s as i64));
2709            }
2710            if u != 0 {
2711                gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, -(u as i64));
2712            }
2713        }
2714    }
2715
2716    /// Write application data (request/response bodies, headers) across all
2717    /// active streams, respecting priority ordering and flow control.
2718    ///
2719    /// This is the main data-plane write path: it resumes any partially-written
2720    /// stream, prepares new frames via the H2 block converter, flushes them to
2721    /// the socket, and recycles completed streams.
2722    ///
2723    /// NOTE: The priority iteration loop and converter setup remain inline here
2724    /// because the converter borrows `self.encoder`, preventing further
2725    /// decomposition into `&mut self` methods within the loop body.
2726    fn write_streams<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2727    where
2728        E: Endpoint,
2729        L: ListenerHandler + L7ListenerHandler,
2730    {
2731        self.timeout_container.reset();
2732        // Pre-compute byte totals for proportional overhead distribution.
2733        let byte_totals = self.compute_stream_byte_totals(context);
2734        let mut io_slices: Vec<IoSlice<'static>> = Vec::new();
2735
2736        if let Some(
2737            write_stream @ H2StreamId::Other {
2738                id: stream_id,
2739                gid: global_stream_id,
2740            },
2741        ) = self.expect_write
2742        {
2743            let stream = &mut context.streams[global_stream_id];
2744            let stream_state = stream.state;
2745            let parts = stream.split(&self.position);
2746            let kawa = parts.wbuffer;
2747            // Resume path: if the same stream is parked waiting for buffer
2748            // space (expect_read matches write_stream), pass the amount so
2749            // flush_stream_out can re-enable READABLE as soon as we drain.
2750            let cross_read_amount = match self.expect_read {
2751                Some((read_stream, amount)) if write_stream == read_stream => Some(amount),
2752                _ => None,
2753            };
2754            let mut resume_bytes: usize = 0;
2755            let outcome = Self::flush_stream_out(
2756                &mut self.socket,
2757                kawa,
2758                parts.metrics,
2759                &self.position,
2760                &mut self.readiness,
2761                &mut context.debug,
2762                2,
2763                global_stream_id,
2764                None,
2765                cross_read_amount,
2766                &mut io_slices,
2767                Some(&mut resume_bytes),
2768            );
2769            // Refresh the per-stream idle timer when outbound bytes move: a
2770            // large response delivered at low bandwidth is "active", not idle,
2771            // even when the peer sends no inbound frames.
2772            if resume_bytes > 0 {
2773                if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2774                    *t = Instant::now();
2775                }
2776                // Clear the flow-control-stall deadline ONLY when the effective
2777                // send window is genuinely open — that alone is a real un-stall.
2778                // A window-stalled stream can flush a `WINDOW_UPDATE(+1)`-drip
2779                // byte HERE via socket-backpressure resume; clearing on that
2780                // would reset the deadline at 1-byte granularity and re-open the
2781                // drip the M2 cumulative-stall budget closes. While still blocked,
2782                // leave the deadline (and its progress accumulator) for the main
2783                // write loop's budget to govern — keeping the two maps in lockstep.
2784                if min(*parts.window, self.flow_control.window) > 0 {
2785                    self.stream_fc_stalled_since.remove(&stream_id);
2786                    self.stream_fc_stalled_progress.remove(&stream_id);
2787                }
2788            }
2789            if outcome == FlushOutcome::Stalled {
2790                return MuxResult::Continue;
2791            }
2792            self.expect_write = None;
2793            if (kawa.is_terminated() || kawa.is_error())
2794                && kawa.is_completed()
2795                && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2796            {
2797                let (client_rtt, server_rtt) = Self::snapshot_rtts(
2798                    &self.position,
2799                    &self.socket,
2800                    &endpoint,
2801                    stream.linked_token(),
2802                );
2803
2804                if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2805                    &self.position,
2806                    &mut self.bytes,
2807                    &self.streams,
2808                    stream,
2809                    global_stream_id,
2810                    stream_id,
2811                    byte_totals,
2812                    &mut context.debug,
2813                    context.listener.clone(),
2814                    client_rtt,
2815                    server_rtt,
2816                ) {
2817                    // Remove the recycled stream from the connection maps
2818                    // before endpoint.end_stream() can trigger teardown.
2819                    // Otherwise session close can observe a stale `Recycle`
2820                    // entry in self.streams and mis-handle the connection as
2821                    // if it still had an active H2 stream.
2822                    self.remove_dead_stream(dead_id, global_stream_id);
2823                    if let Some(token) = token {
2824                        remove_backend_stream(
2825                            &mut context.backend_streams,
2826                            token,
2827                            global_stream_id,
2828                        );
2829                        endpoint.end_stream(token, global_stream_id, context);
2830                    }
2831                }
2832            }
2833        }
2834
2835        self.gauge_connection_state();
2836
2837        let scheme: &'static [u8] = if context.listener.borrow().protocol() == Protocol::HTTPS {
2838            b"https"
2839        } else {
2840            b"http"
2841        };
2842        let mut completed_streams = Vec::new();
2843        let mut converter_buf = std::mem::take(&mut self.converter_buf);
2844        converter_buf.clear();
2845        let mut converter = converter::H2BlockConverter {
2846            max_frame_size: self.peer_settings.settings_max_frame_size as usize,
2847            window: 0,
2848            stream_id: 0,
2849            encoder: &mut self.encoder,
2850            out: converter_buf,
2851            scheme,
2852            lowercase_buf: std::mem::take(&mut self.lowercase_buf),
2853            cookie_buf: std::mem::take(&mut self.cookie_buf),
2854            // When this connection is a backend client we are writing
2855            // toward the upstream backend — flow-control stalls in that
2856            // direction are scoped to `backend.flow_control.paused` (in
2857            // addition to the existing direction-agnostic
2858            // `h2.flow_control_stall`).
2859            position_is_client: self.position.is_client(),
2860            // RFC 9218 §4: toggled per-stream in the loop below, driven by
2861            // `Prioriser::get(stream_id).1`. Non-incremental by default so
2862            // unit tests and non-scheduled callers (e.g. the resume path
2863            // above) keep the sequential semantics.
2864            incremental_mode: false,
2865            // Populated once per write pass from `apply_incremental_rotation`
2866            // below. The converter uses `incremental_peer_count <= 1` to skip
2867            // the RFC 9218 yield-after-one-DATA behaviour when there is no
2868            // peer to interleave with (solo-bucket fast path).
2869            incremental_peer_count: 0,
2870            // RFC 7541 §6.3: move the pending size-update onto the converter
2871            // so the first header block of this pass prepends the signal.
2872            // We clear the connection-side mirror only AFTER the write pass
2873            // confirms emission via `converter.size_update_emitted`, so a
2874            // DATA-only write pass (no header block) does not drop the
2875            // signal.
2876            pending_table_size_update: self.pending_table_size_update,
2877            size_update_emitted: false,
2878            // Reset on every write pass; `check_header_capacity` flips it
2879            // mid-call and `finalize` commits the abort by flipping
2880            // `kawa.parsing_phase` to Error so the next pass emits
2881            // RST_STREAM(InternalError).
2882            pending_oversized_abort: false,
2883        };
2884        self.priorities_buf.clear();
2885        self.priorities_buf.extend(self.streams.keys().copied());
2886        // RFC 9218 §4 primary sort: ascending urgency, then stream ID for
2887        // stability. The incremental flag is handled by
2888        // `apply_incremental_rotation` below so it does not perturb the
2889        // non-incremental fast path.
2890        self.priorities_buf.sort_by_cached_key(|id| {
2891            let (urgency, _) = self.prioriser.get(id);
2892            (urgency, *id)
2893        });
2894        // RFC 9218 §4: inside each urgency bucket, move incremental streams
2895        // to the tail and rotate them by the per-connection round-robin
2896        // cursor so no single slow-draining stream can starve its
2897        // same-urgency incremental peers.
2898        let incremental_count = self
2899            .prioriser
2900            .apply_incremental_rotation(&mut self.priorities_buf);
2901
2902        // RFC 9218 §4 refinement (Tier 3a): the connection-global
2903        // `incremental_count` is too coarse for `converter.incremental_peer_count`.
2904        // A solo `u=0, i` stream with an unrelated `u=7, i` peer in a
2905        // different urgency bucket would still see `incremental_peer_count > 1`
2906        // and voluntarily yield — stranding bytes the invariant-15/16 guards
2907        // were meant to prevent. Scope the count to same-urgency streams that
2908        // are actually ready to emit this pass (eligibility mirrors the check
2909        // in the write loop below).
2910        let mut ready_incremental_by_urgency: HashMap<u8, usize> = HashMap::new();
2911        for &sid in self.priorities_buf.iter() {
2912            let (urgency, is_incremental) = self.prioriser.get(&sid);
2913            if !is_incremental {
2914                continue;
2915            }
2916            let Some(&gid) = self.streams.get(&sid) else {
2917                continue;
2918            };
2919            let wbuffer = match self.position {
2920                Position::Server => &context.streams[gid].back,
2921                Position::Client(..) => &context.streams[gid].front,
2922            };
2923            if wbuffer.is_main_phase()
2924                || (wbuffer.is_terminated() && !wbuffer.is_completed())
2925                || (wbuffer.is_error() && !self.rst_sent.contains(&sid))
2926            {
2927                *ready_incremental_by_urgency.entry(urgency).or_insert(0) += 1;
2928            }
2929        }
2930
2931        trace!(
2932            "{} PRIORITIES: {:?} (incremental_count={}, per_bucket={:?})",
2933            log_context!(self),
2934            self.priorities_buf,
2935            incremental_count,
2936            ready_incremental_by_urgency
2937        );
2938        let mut socket_write = false;
2939        // RFC 9218 §4 round-robin: remember the first incremental stream we
2940        // served this pass so we can advance `Prioriser::incremental_cursor`
2941        // to it, causing the next pass to start with the stream just after.
2942        let mut first_incremental_fired: Option<StreamId> = None;
2943        // Total outbound bytes emitted across all stream flushes this pass —
2944        // `finalize_write` uses this to distinguish a voluntary scheduler
2945        // yield (progress + pending back-buffer, LIFECYCLE §9 invariant 16)
2946        // from a no-progress wait state (e.g. flow-control starvation).
2947        let mut total_bytes_written: usize = 0;
2948        // Collect every fresh RST_STREAM emitted via the converter
2949        // (`initialize` chokepoint or the HPACK over-budget abort path)
2950        // so we can run `account_emitted_rst` for each one AFTER the
2951        // converter is dropped — the converter holds `&mut self.encoder`
2952        // for the loop body so we cannot take `&mut self` until then.
2953        let mut freshly_emitted_rsts: Vec<H2Error> = Vec::new();
2954        'outer: for idx in 0..self.priorities_buf.len() {
2955            let stream_id = self.priorities_buf[idx];
2956            let Some(&global_stream_id) = self.streams.get(&stream_id) else {
2957                error!(
2958                    "{} stream_id {} from sorted keys missing in streams map",
2959                    log_context!(self),
2960                    stream_id
2961                );
2962                continue;
2963            };
2964            let (urgency, is_incremental) = self.prioriser.get(&stream_id);
2965            let stream = &mut context.streams[global_stream_id];
2966            let stream_state = stream.state;
2967            let parts = stream.split(&self.position);
2968            let kawa = parts.wbuffer;
2969            // Hoisted out of the gate below so the post-flush flow-control-stall
2970            // classification can see how many flow-control bytes this pass moved.
2971            let mut consumed: i32 = 0;
2972            if kawa.is_main_phase()
2973                || (kawa.is_terminated() && !kawa.is_completed())
2974                || (kawa.is_error() && !self.rst_sent.contains(&stream_id))
2975            {
2976                let window = min(*parts.window, self.flow_control.window);
2977                converter.window = window;
2978                converter.stream_id = stream_id;
2979                // RFC 9218 §4: incremental streams yield the converter after
2980                // a single DATA frame so same-urgency peers interleave.
2981                converter.incremental_mode = is_incremental;
2982                // Same-urgency-bucket ready-peer count (Tier 3a, LIFECYCLE §9
2983                // invariant 17). The converter skips the yield when there is
2984                // no peer in the same bucket to interleave with — prevents
2985                // the `finalize_write` WRITABLE-withdrawal strand (see
2986                // `test_h2_solo_incremental_drains_fully`). A connection-wide
2987                // count would wrongly yield for a solo incremental stream
2988                // when another urgency bucket happens to contain an
2989                // incremental peer.
2990                converter.incremental_peer_count = ready_incremental_by_urgency
2991                    .get(&urgency)
2992                    .copied()
2993                    .unwrap_or(0);
2994                // Track RST_STREAM dedup: if kawa is in error state, the converter
2995                // will generate a RST_STREAM frame via `initialize`. Mark it so we
2996                // don't send a duplicate on the next writable cycle.
2997                if kawa.is_error() {
2998                    let freshly_rst = self.rst_sent.insert(stream_id);
2999                    // LIFECYCLE §9 invariant 17: any transition to ineligible
3000                    // mid-pass MUST decrement ready_incremental_by_urgency so
3001                    // later streams in the same 'outer iteration see the live
3002                    // count, not the snapshot. Missing this costs one voluntary
3003                    // yield per same-urgency peer that trails the RST.
3004                    if freshly_rst && is_incremental {
3005                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3006                            *c = c.saturating_sub(1);
3007                        }
3008                    }
3009                    // Account for the RST that `initialize` is about to emit
3010                    // for this stream. Without this the MadeYouReset lifetime
3011                    // cap is evadable: any path that flips `parsing_phase` to
3012                    // Error before reaching this gate (oversized inbound
3013                    // trailers, malformed bodies, etc.) would land an
3014                    // unaccounted RST on the wire. We defer the actual
3015                    // accounting call until after `drop(converter)` — the
3016                    // converter holds `&mut self.encoder` here.
3017                    if freshly_rst {
3018                        freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
3019                    }
3020                }
3021                // Apply per-frontend response-side header edits
3022                // (set/replace/delete) stashed by the routing layer at
3023                // request time. H2 frontends always run as Server
3024                // position; the back-side H2 client (when sozu speaks
3025                // H2 to a backend) is a request emission and was
3026                // already mutated by Router::route_from_request.
3027                //
3028                // The snapshot is **drained** via `mem::take` so the
3029                // injection runs exactly once per response. Without
3030                // this, a re-entry of `write_streams` for the same
3031                // stream (multi-frame body, flow-control yield, or
3032                // RFC 9218 same-urgency round-robin) would re-call
3033                // `apply_response_header_edits` after `kawa.prepare`
3034                // had already consumed the `Block::Flags{end_header}`
3035                // anchor — the helper falls back to
3036                // `kawa.blocks.len()` and appends the edit AFTER all
3037                // remaining DATA blocks. The next prepare cycle then
3038                // encodes that orphan `Block::Header` into
3039                // `H2BlockConverter.out` with no closing
3040                // `Block::Flags{end_header}` to flush it as a HEADERS
3041                // frame, and `H2BlockConverter::finalize` trips the
3042                // "out buffer not empty (38 bytes remaining), clearing"
3043                // defense-in-depth log on every re-entry. 38 bytes is
3044                // the static-table HPACK encoding of a typical HSTS
3045                // header, which is how the symptom surfaces in
3046                // production once the listener-default HSTS reaches a
3047                // non-trivial share of frontends.
3048                if matches!(self.position, super::Position::Server)
3049                    && !parts.context.headers_response.is_empty()
3050                {
3051                    let edits = std::mem::take(&mut parts.context.headers_response);
3052                    super::shared::apply_response_header_edits(kawa, &edits);
3053                }
3054                kawa.prepare(&mut converter);
3055                // The pre-prepare gate at line 2483 only inserts into
3056                // `rst_sent` when `kawa.is_error()` is already true on
3057                // entry. The HPACK over-budget abort path
3058                // (`H2BlockConverter::check_header_capacity` →
3059                // `finalize`) flips `parsing_phase` to Error AND pushes
3060                // its own RST_STREAM frame inside this same prepare
3061                // pass; without a post-prepare insert here the next
3062                // writable cycle would gate-pass and double-emit a
3063                // RST_STREAM via the existing `initialize` chokepoint.
3064                //
3065                // Per Codex P2: the converter's direct RST emission
3066                // bypasses the metric/flood accounting that
3067                // `Self::reset_stream` performs. Mirror it here so a
3068                // peer that drives oversized headers across many
3069                // streams cannot escape the MadeYouReset emitted-RST
3070                // lifetime cap and so dashboards see the per-error
3071                // counter and the global tx counter.
3072                //
3073                // Per Codex P3: when an incremental stream flips to
3074                // Error mid-prepare, the RFC 9218 §4 yield-after-one
3075                // accounting must drop this stream from the
3076                // same-urgency ready bucket so trailing peers see the
3077                // live count.
3078                let freshly_rst_post_prepare = kawa.is_error() && self.rst_sent.insert(stream_id);
3079                if freshly_rst_post_prepare {
3080                    // Defer accounting until after `drop(converter)`; same
3081                    // reason as the pre-prepare collector above.
3082                    freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
3083                    if is_incremental {
3084                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3085                            *c = c.saturating_sub(1);
3086                        }
3087                    }
3088                }
3089                consumed = window - converter.window;
3090                *parts.window = parts.window.saturating_sub(consumed);
3091                self.flow_control.window = self.flow_control.window.saturating_sub(consumed);
3092                if is_incremental && consumed > 0 && first_incremental_fired.is_none() {
3093                    first_incremental_fired = Some(stream_id);
3094                }
3095            }
3096            context.debug.push(DebugEvent::S(
3097                stream_id,
3098                global_stream_id,
3099                kawa.parsing_phase,
3100                kawa.blocks.len(),
3101                kawa.out.len(),
3102            ));
3103            let mut stream_bytes: usize = 0;
3104            let outcome = Self::flush_stream_out(
3105                &mut self.socket,
3106                kawa,
3107                parts.metrics,
3108                &self.position,
3109                &mut self.readiness,
3110                &mut context.debug,
3111                3,
3112                global_stream_id,
3113                Some(&mut socket_write),
3114                None,
3115                &mut io_slices,
3116                Some(&mut stream_bytes),
3117            );
3118            // Refresh the per-stream idle timer on outbound bytes. Without
3119            // this, a long-running response trickled at low bandwidth would
3120            // be killed by `cancel_timed_out_streams` mid-delivery — the
3121            // inbound-only refresh at h2.rs:3887-3895 / 4026-4031 never
3122            // fires while the peer is idle.
3123            if stream_bytes > 0 {
3124                if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
3125                    *t = Instant::now();
3126                }
3127            }
3128            // Arm/age the dedicated flow-control-stall deadline that catches a
3129            // window-stalled stream — a buffered RESPONSE to a slow frontend
3130            // (`Position::Server`) OR a buffered request UPLOAD to a slow H2
3131            // backend (`Position::Client`): window-stall reaping is bidirectional
3132            // by design (M4), so there is no position gate here. Set only when the
3133            // stream holds sendable buffered data it cannot send because its
3134            // effective send window is exhausted; unlike `stream_last_activity_at`
3135            // it is NEVER refreshed by inbound DATA/HEADERS, so a peer dribbling
3136            // 1-byte DATA cannot keep it warm.
3137            //
3138            // M2 cumulative-stall budget: a genuinely OPEN window clears the
3139            // deadline immediately (real un-stall). While the window stays
3140            // blocked, accumulate this pass's outbound drain; only cumulative
3141            // progress reaching `FC_STALL_CLEAR_FLOOR` (a full frame of real
3142            // delivery) clears it. A `WINDOW_UPDATE(+1)` drip drains ~1 byte/pass
3143            // straight back to a zero window, so it never reaches the floor — the
3144            // deadline ages out and `cancel_timed_out_streams` RST(CANCEL)s the
3145            // slot-pinning stream after `stream_idle_timeout`.
3146            let outbound_window_blocked = has_sendable_response(kawa)
3147                && min(*parts.window, self.flow_control.window) <= 0
3148                && (!kawa.blocks.is_empty() || !kawa.out.is_empty());
3149            match fc_stall_budget_decision(
3150                outbound_window_blocked,
3151                consumed,
3152                self.stream_fc_stalled_progress.get(&stream_id).copied(),
3153            ) {
3154                FcStallAction::Clear => {
3155                    self.stream_fc_stalled_since.remove(&stream_id);
3156                    self.stream_fc_stalled_progress.remove(&stream_id);
3157                }
3158                FcStallAction::Arm { progress } => {
3159                    self.stream_fc_stalled_since
3160                        .entry(stream_id)
3161                        .or_insert_with(Instant::now);
3162                    self.stream_fc_stalled_progress.insert(stream_id, progress);
3163                }
3164            }
3165            total_bytes_written = total_bytes_written.saturating_add(stream_bytes);
3166            if outcome == FlushOutcome::Stalled {
3167                self.expect_write = Some(H2StreamId::Other {
3168                    id: stream_id,
3169                    gid: global_stream_id,
3170                });
3171                break 'outer;
3172            }
3173            self.expect_write = None;
3174            if (kawa.is_terminated() || kawa.is_error())
3175                && kawa.is_completed()
3176                && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
3177            {
3178                let close_frontend =
3179                    matches!(self.position, Position::Server) && !parts.context.keep_alive_frontend;
3180                let (client_rtt, server_rtt) = Self::snapshot_rtts(
3181                    &self.position,
3182                    &self.socket,
3183                    &endpoint,
3184                    stream.linked_token(),
3185                );
3186
3187                if let Some((dead_id, token)) = Self::try_recycle_server_stream(
3188                    &self.position,
3189                    &mut self.bytes,
3190                    &self.streams,
3191                    stream,
3192                    global_stream_id,
3193                    stream_id,
3194                    byte_totals,
3195                    &mut context.debug,
3196                    context.listener.clone(),
3197                    client_rtt,
3198                    server_rtt,
3199                ) {
3200                    completed_streams.push((dead_id, global_stream_id, token, close_frontend));
3201                    // LIFECYCLE §9 invariant 17: decrement INSIDE 'outer so
3202                    // later iterations see the reduced count. The post-loop
3203                    // retirement at remove_dead_stream is too late.
3204                    if is_incremental {
3205                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3206                            *c = c.saturating_sub(1);
3207                        }
3208                    }
3209                }
3210            }
3211        }
3212        gauge!(
3213            "h2.streams.ready_incremental.by_urgency",
3214            ready_incremental_by_urgency
3215                .values()
3216                .copied()
3217                .sum::<usize>()
3218        );
3219        // Reclaim the converter's reusable buffers before any &mut self calls,
3220        // since the converter borrows self.encoder.
3221        let converter_out = std::mem::take(&mut converter.out);
3222        let lowercase_buf = std::mem::take(&mut converter.lowercase_buf);
3223        let cookie_buf = std::mem::take(&mut converter.cookie_buf);
3224        // RFC 7541 §6.3: clear our mirror of the pending size-update only
3225        // AFTER the converter confirmed the signal was emitted to its
3226        // output buffer. A DATA-only pass leaves `size_update_emitted` as
3227        // `false` so the signal stays queued for the next pass with a
3228        // header block.
3229        let size_update_emitted = converter.size_update_emitted;
3230        drop(converter);
3231        if size_update_emitted {
3232            self.pending_table_size_update = None;
3233        }
3234        // Account every RST that the converter emitted during this pass
3235        // (pre-prepare gate + post-prepare HPACK over-budget abort) so
3236        // the global tx counter, the per-error breakdown, and the
3237        // MadeYouReset emitted-RST lifetime cap stay in step. If the
3238        // cap trips, propagate the GOAWAY result.
3239        for error in freshly_emitted_rsts {
3240            if let Some(result) = self.account_emitted_rst(error) {
3241                return result;
3242            }
3243        }
3244        self.converter_buf = converter_out;
3245        self.lowercase_buf = lowercase_buf;
3246        self.cookie_buf = cookie_buf;
3247        self.shrink_converter_buffers();
3248        // RFC 9218 §4: commit the round-robin cursor so the next writable
3249        // cycle begins with the stream immediately after the one we fired
3250        // first this pass.
3251        self.prioriser
3252            .advance_incremental_cursor(first_incremental_fired);
3253        let mut close_frontend_after_completed_stream = false;
3254        for (dead_id, global_stream_id, token, close_frontend) in completed_streams {
3255            // The main write loop borrows self.encoder, so we can't mutate the
3256            // H2 maps inline. Retire the recycled stream immediately after the
3257            // converter borrow ends, before endpoint.end_stream() can trigger
3258            // teardown and observe a stale `Recycle` entry in self.streams.
3259            self.remove_dead_stream(dead_id, global_stream_id);
3260            close_frontend_after_completed_stream |= close_frontend;
3261            if let Some(token) = token {
3262                remove_backend_stream(&mut context.backend_streams, token, global_stream_id);
3263                endpoint.end_stream(token, global_stream_id, context);
3264            }
3265        }
3266        if close_frontend_after_completed_stream && !self.drain.draining {
3267            return if self.streams.is_empty() {
3268                self.goaway(H2Error::NoError)
3269            } else {
3270                self.graceful_goaway()
3271            };
3272        }
3273        self.finalize_write(socket_write, total_bytes_written, context)
3274    }
3275
3276    /// Remove streams that completed their lifecycle from all tracking maps.
3277    /// After forwarding a 1xx informational response (100 Continue, 103 Early Hints),
3278    /// reset the back buffer and re-enable backend readable so the final response
3279    /// can arrive on the same stream. Returns true if the response was 1xx.
3280    #[allow(clippy::too_many_arguments)]
3281    fn flush_stream_out(
3282        socket: &mut Front,
3283        kawa: &mut GenericHttpStream,
3284        metrics: &mut SessionMetrics,
3285        position: &Position,
3286        readiness: &mut Readiness,
3287        debug: &mut DebugHistory,
3288        debug_site: usize,
3289        global_stream_id: GlobalStreamId,
3290        mut wrote: Option<&mut bool>,
3291        cross_read_amount: Option<usize>,
3292        io_slices: &mut Vec<IoSlice<'static>>,
3293        mut bytes_written: Option<&mut usize>,
3294    ) -> FlushOutcome {
3295        while !kawa.out.is_empty() {
3296            if let Some(flag) = wrote.as_deref_mut() {
3297                *flag = true;
3298            }
3299            io_slices.clear();
3300            let buffer = kawa.storage.buffer();
3301            for block in kawa.out.iter() {
3302                match block {
3303                    kawa::OutBlock::Delimiter => break,
3304                    kawa::OutBlock::Store(store) => {
3305                        let data = store.data(buffer);
3306                        // SAFETY: the IoSlice references point into kawa's
3307                        // storage buffer. They are used only for the
3308                        // socket_write_vectored call below and cleared
3309                        // immediately after, before kawa.consume() which may
3310                        // relocate the buffer via ptr::copy (shift). No
3311                        // dangling 'static refs exist during consume().
3312                        let data: &'static [u8] =
3313                            unsafe { std::slice::from_raw_parts(data.as_ptr(), data.len()) };
3314                        io_slices.push(IoSlice::new(data));
3315                    }
3316                }
3317            }
3318            let (size, status) = socket.socket_write_vectored(io_slices);
3319            io_slices.clear();
3320            debug_assert!(
3321                io_slices.is_empty(),
3322                "IoSlice refs must be cleared before consume"
3323            );
3324            debug.push(DebugEvent::SocketIO(debug_site, global_stream_id, size));
3325            kawa.consume(size);
3326            position.count_bytes_out_counter(size);
3327            position.count_bytes_out(metrics, size);
3328            if let Some(counter) = bytes_written.as_deref_mut() {
3329                *counter = counter.saturating_add(size);
3330            }
3331            if let Some(amount) = cross_read_amount {
3332                // Resume path: same stream is parked waiting for buffer space.
3333                // Re-enable READABLE once the write freed enough room.
3334                if kawa.storage.available_space() >= amount {
3335                    readiness.interest.insert(Ready::READABLE);
3336                }
3337            }
3338            if update_readiness_after_write(size, status, readiness) {
3339                return FlushOutcome::Stalled;
3340            }
3341        }
3342        FlushOutcome::Drained
3343    }
3344
3345    fn handle_1xx_reset<E: Endpoint>(
3346        kawa: &mut GenericHttpStream,
3347        stream_state: StreamState,
3348        endpoint: &mut E,
3349    ) -> bool {
3350        let is_1xx = matches!(
3351            kawa.detached.status_line,
3352            kawa::StatusLine::Response { code, .. } if (100..200).contains(&code)
3353        );
3354        if !is_1xx {
3355            return false;
3356        }
3357        debug!(
3358            "{} H2 write_streams: 1xx informational forwarded, resetting back buffer",
3359            log_module_context!()
3360        );
3361        kawa.clear();
3362        if let StreamState::Linked(token) = stream_state {
3363            let readiness = endpoint.readiness_mut(token);
3364            readiness.interest.insert(Ready::READABLE);
3365            readiness.signal_pending_read();
3366        }
3367        true
3368    }
3369
3370    /// Re-arm edge-triggered WRITABLE event if rustls still has buffered TLS data.
3371    fn ensure_tls_flushed(&mut self) {
3372        if self.socket.socket_wants_write() {
3373            self.readiness.signal_pending_write();
3374        }
3375    }
3376
3377    /// Evict every per-stream piece of state carried by this `ConnectionH2`.
3378    ///
3379    /// **Invariant**: `rst_sent`, `stream_last_activity_at`,
3380    /// `stream_fc_stalled_since`, `stream_fc_stalled_progress` and `prioriser`
3381    /// MUST be emptied of `stream_id` here — they are the only five per-stream
3382    /// caches that are not stored in the slab-allocated
3383    /// `Context.streams[]`. Forgetting any of them causes unbounded memory
3384    /// growth on long-lived connections with many cancelled streams. The
3385    /// `debug_assert`s below fail loudly in test builds if someone adds a
3386    /// new per-stream cache without updating this function.
3387    fn remove_dead_stream(&mut self, stream_id: StreamId, global_stream_id: GlobalStreamId) {
3388        if self.streams.remove(&stream_id).is_none() {
3389            error!(
3390                "{} dead stream_id {} missing from streams map",
3391                log_context!(self),
3392                stream_id
3393            );
3394        }
3395        self.rst_sent.remove(&stream_id);
3396        self.stream_last_activity_at.remove(&stream_id);
3397        self.stream_fc_stalled_since.remove(&stream_id);
3398        self.stream_fc_stalled_progress.remove(&stream_id);
3399        self.prioriser.remove(&stream_id);
3400        debug_assert!(
3401            !self.rst_sent.contains(&stream_id),
3402            "rst_sent still contains stream_id {stream_id} after eviction"
3403        );
3404        debug_assert!(
3405            !self.stream_last_activity_at.contains_key(&stream_id),
3406            "stream_last_activity_at still contains stream_id {stream_id} after eviction"
3407        );
3408        debug_assert!(
3409            !self.stream_fc_stalled_since.contains_key(&stream_id),
3410            "stream_fc_stalled_since still contains stream_id {stream_id} after eviction"
3411        );
3412        debug_assert!(
3413            !self.stream_fc_stalled_progress.contains_key(&stream_id),
3414            "stream_fc_stalled_progress still contains stream_id {stream_id} after eviction"
3415        );
3416        // Invariant: expect_write/expect_read must not reference a gid whose
3417        // context slot may be popped by shrink_trailing_recycle after eviction.
3418        if matches!(self.expect_write, Some(H2StreamId::Other { gid, .. }) if gid == global_stream_id)
3419        {
3420            self.expect_write = None;
3421        }
3422        if matches!(
3423            self.expect_read,
3424            Some((H2StreamId::Other { gid, .. }, _)) if gid == global_stream_id
3425        ) {
3426            self.expect_read = None;
3427        }
3428    }
3429
3430    /// Drop stream-id mappings for streams that never became active before a
3431    /// connection-level close. This happens on incomplete/oversized header
3432    /// blocks: the stream slot is created on the initial HEADERS frame, then a
3433    /// GOAWAY closes the connection before the request is fully materialized.
3434    fn prune_inactive_streams_while_closing<L>(&mut self, context: &mut Context<L>)
3435    where
3436        L: ListenerHandler + L7ListenerHandler,
3437    {
3438        if !self.drain.draining || !matches!(self.state, H2State::GoAway | H2State::Error) {
3439            return;
3440        }
3441
3442        let stale_streams = self
3443            .streams
3444            .iter()
3445            .filter_map(|(&stream_id, &global_stream_id)| {
3446                (!context.streams[global_stream_id].state.is_open())
3447                    .then_some((stream_id, global_stream_id))
3448            })
3449            .collect::<Vec<_>>();
3450
3451        for (stream_id, global_stream_id) in stale_streams {
3452            let stream = &mut context.streams[global_stream_id];
3453            if stream.state == StreamState::Idle {
3454                stream.front.clear();
3455                stream.front.storage.clear();
3456                stream.back.clear();
3457                stream.back.storage.clear();
3458                stream.metrics.reset();
3459                stream.state = StreamState::Recycle;
3460            }
3461            self.remove_dead_stream(stream_id, global_stream_id);
3462        }
3463    }
3464
3465    /// Shrink reusable converter buffers when they grow beyond 16 KB to avoid
3466    /// holding memory after a burst of large headers.
3467    fn shrink_converter_buffers(&mut self) {
3468        if self.converter_buf.capacity() > 16_384 {
3469            self.converter_buf.shrink_to(4096);
3470        }
3471        if self.lowercase_buf.capacity() > 16_384 {
3472            self.lowercase_buf.shrink_to(4096);
3473        }
3474        if self.cookie_buf.capacity() > 16_384 {
3475            self.cookie_buf.shrink_to(4096);
3476        }
3477    }
3478
3479    /// Post-write phase: check drain completion, flush TLS, and update readiness.
3480    ///
3481    /// `bytes_written_this_pass` reports the total outbound bytes `write_streams`
3482    /// pushed to the socket (across every stream), and is used to distinguish
3483    /// two very different "no `expect_write`" states:
3484    ///
3485    /// - **Voluntary yield with progress**: at least one DATA/HEADERS frame
3486    ///   emitted, but a stream left non-empty `back.out`/`back.blocks` because
3487    ///   the converter yielded (e.g. RFC 9218 incremental rotation). LIFECYCLE
3488    ///   §9 invariant 16: keep `Ready::WRITABLE` armed so the session loop can
3489    ///   resume flushing on the next tick without waiting for an external
3490    ///   wake-up that edge-triggered epoll will not deliver.
3491    /// - **No progress at all**: converter pushed every block back (e.g. flow
3492    ///   window exhausted, no HEADERS ready yet). Strip `Ready::WRITABLE` —
3493    ///   forward progress must come from an external trigger
3494    ///   (`WINDOW_UPDATE`, new request), not from looping writable().
3495    ///
3496    /// Returns `MuxResult::Continue` in the normal case, or triggers a graceful
3497    /// GOAWAY when draining and all streams have completed.
3498    fn finalize_write<L>(
3499        &mut self,
3500        socket_write: bool,
3501        bytes_written_this_pass: usize,
3502        context: &mut Context<L>,
3503    ) -> MuxResult
3504    where
3505        L: ListenerHandler + L7ListenerHandler,
3506    {
3507        // RFC 9113 §6.8: if draining and all streams have completed,
3508        // send the final GOAWAY with the actual last_stream_id
3509        if self.drain.draining && self.streams.is_empty() {
3510            return self.graceful_goaway();
3511        }
3512
3513        if self.socket.socket_wants_write() {
3514            if !socket_write {
3515                self.socket.socket_write(&[]);
3516            }
3517            // Edge-triggered epoll: re-arm WRITABLE if rustls still has
3518            // pending encrypted data (first check triggers flush, second re-checks).
3519            self.ensure_tls_flushed();
3520        } else if self.expect_write.is_none() {
3521            // LIFECYCLE §9 invariant 16: retain `Ready::WRITABLE` when a
3522            // voluntary scheduler yield leaves stranded bytes in a stream's
3523            // `back.out`/`back.blocks` *after* the pass made forward
3524            // progress. Requiring progress avoids the degenerate no-progress
3525            // loop (e.g. flow-control-starved streams) that would otherwise
3526            // busy-spin against the session dispatcher.
3527            if bytes_written_this_pass > 0
3528                && any_stream_has_pending_back(&self.streams, &context.streams)
3529            {
3530                #[cfg(debug_assertions)]
3531                context.debug.push(DebugEvent::Str(
3532                    "finalize_write: invariant 16 retained WRITABLE (pending back-buffer)"
3533                        .to_owned(),
3534                ));
3535            } else if !self.pending_rst_streams.is_empty()
3536                || !self.flow_control.pending_window_updates.is_empty()
3537            {
3538                // Control-frame liveness: `flush_pending_control_frames` is
3539                // gated on `expect_write.is_none()`, so when a prior partial
3540                // write deferred the flush the RST / WINDOW_UPDATE queues
3541                // stay non-empty after `expect_write` finally drains. Without
3542                // this rearm the next tick would drop `Ready::WRITABLE` and
3543                // the queued RST would stall until an unrelated event
3544                // re-triggered writable — which is exactly the scenario
3545                // h2spec trips by sending back-to-back malformed streams.
3546                #[cfg(debug_assertions)]
3547                context.debug.push(DebugEvent::Str(
3548                    "finalize_write: retained WRITABLE (control queue non-empty)".to_owned(),
3549                ));
3550                self.readiness.arm_writable();
3551                incr!(names::h2::SIGNAL_WRITABLE_REARMED_CONTROL_QUEUE);
3552            } else {
3553                // We wrote everything
3554                #[cfg(debug_assertions)]
3555                context.debug.push(DebugEvent::Str(format!(
3556                    "Wrote everything: {:?}",
3557                    self.streams
3558                )));
3559                self.readiness.interest.remove(Ready::WRITABLE);
3560            }
3561        }
3562        MuxResult::Continue
3563    }
3564
3565    /// Flush pending control frames (zero-buffer resume, WINDOW_UPDATEs, RST_STREAMs)
3566    /// before entering the main writable state machine.
3567    ///
3568    /// Returns `Some(result)` if the caller should return early (e.g. socket would
3569    /// block, GOAWAY triggered), or `None` if writable() should proceed normally.
3570    fn flush_pending_control_frames(&mut self) -> Option<MuxResult> {
3571        if self.frontend_hung_up_while_draining() {
3572            self.expect_write = None;
3573            self.zero.storage.clear();
3574            self.flow_control.pending_window_updates.clear();
3575            self.pending_rst_streams.clear();
3576        }
3577
3578        // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
3579        if let Some(sent_at) = self.settings_sent_at {
3580            if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
3581                warn!(
3582                    "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
3583                    log_context!(self),
3584                    SETTINGS_ACK_TIMEOUT
3585                );
3586                return Some(self.goaway(H2Error::SettingsTimeout));
3587            }
3588        }
3589
3590        // Stage — resume zero-buffer flush.
3591        // If a previous write was partial, finish it before serialising any
3592        // new control frames. Don't reset the timeout for control frame
3593        // writes (SETTINGS ACK, PING response, WINDOW_UPDATE) — only
3594        // application-data writes should reset it.
3595        if let Some(H2StreamId::Zero) = self.expect_write {
3596            if self.flush_zero_to_socket() {
3597                self.ensure_tls_flushed();
3598                return Some(MuxResult::Continue);
3599            }
3600            // When H2StreamId::Zero is used to write, READABLE is disabled —
3601            // re-enable it now that the flush is complete.
3602            self.readiness.interest.insert(Ready::READABLE);
3603            self.expect_write = None;
3604        }
3605
3606        // Stage — drain pending WINDOW_UPDATE frames.
3607        // Serialize and flush them inline to avoid extra event loop
3608        // iterations that could cause response data to be sent before
3609        // subsequent frames are validated.
3610        if !self.flow_control.pending_window_updates.is_empty() && self.expect_write.is_none() {
3611            let kawa = &mut self.zero;
3612            kawa.storage.clear();
3613            let buf = kawa.storage.space();
3614            let mut offset = 0;
3615            // Track which entries we successfully serialized so we can remove them.
3616            // Each WINDOW_UPDATE frame is 13 bytes (9-byte header + 4-byte payload).
3617            let mut written_ids = Vec::new();
3618            for (&stream_id, &increment) in &self.flow_control.pending_window_updates {
3619                if increment == 0 {
3620                    written_ids.push(stream_id);
3621                    continue;
3622                }
3623                match serializer::gen_window_update(&mut buf[offset..], stream_id, increment) {
3624                    Ok((_, size)) => {
3625                        offset += size;
3626                        written_ids.push(stream_id);
3627                        incr!(names::h2::FRAMES_TX_WINDOW_UPDATE);
3628                    }
3629                    Err(_) => {
3630                        // Buffer full — stop here, remaining entries stay in the map
3631                        break;
3632                    }
3633                }
3634            }
3635            // Remove only the entries we successfully wrote (or skipped)
3636            for id in written_ids {
3637                self.flow_control.pending_window_updates.remove(&id);
3638            }
3639            if offset > 0 {
3640                kawa.storage.fill(offset);
3641                if self.flush_zero_to_socket() {
3642                    self.expect_write = Some(H2StreamId::Zero);
3643                    // Edge-triggered epoll: ensure pending TLS data gets flushed
3644                    if self.socket.socket_wants_write() {
3645                        self.readiness.event.insert(Ready::WRITABLE);
3646                    }
3647                    return Some(MuxResult::Continue);
3648                }
3649            }
3650        }
3651
3652        // Stage — RST_STREAM cap check + drain.
3653        // Check the lifetime total (not just pending queue length) because
3654        // writable() drains the queue between readable() calls, so the
3655        // pending count alone may never reach the cap even under sustained
3656        // misbehavior.
3657        if !matches!(self.state, H2State::GoAway | H2State::Error)
3658            && self.total_rst_streams_queued >= MAX_PENDING_RST_STREAMS
3659        {
3660            error!(
3661                "{} total RST_STREAM count {} exceeds cap {}, sending GOAWAY(ENHANCE_YOUR_CALM)",
3662                log_context!(self),
3663                self.total_rst_streams_queued,
3664                MAX_PENDING_RST_STREAMS
3665            );
3666            return Some(self.goaway(H2Error::EnhanceYourCalm));
3667        }
3668
3669        // Flush pending RST_STREAM frames (queued when refusing streams).
3670        // Accounting happens at queue-time inside `Self::enqueue_rst`, so
3671        // this drain only serialises and flushes — no metric/flood calls
3672        // here would double-count.
3673        if !self.pending_rst_streams.is_empty() && self.expect_write.is_none() {
3674            let kawa = &mut self.zero;
3675            kawa.storage.clear();
3676            let buf = kawa.storage.space();
3677            let mut offset = 0;
3678            let mut written_count = 0;
3679            for &(stream_id, ref error) in &self.pending_rst_streams {
3680                let frame_size =
3681                    parser::FRAME_HEADER_SIZE + parser::RST_STREAM_PAYLOAD_SIZE as usize;
3682                if offset + frame_size > buf.len() {
3683                    break;
3684                }
3685                match serializer::gen_rst_stream(&mut buf[offset..], stream_id, error.to_owned()) {
3686                    Ok((_, _)) => {
3687                        offset += frame_size;
3688                        written_count += 1;
3689                    }
3690                    Err(_) => break,
3691                }
3692            }
3693            self.pending_rst_streams.drain(..written_count);
3694            if offset > 0 {
3695                kawa.storage.fill(offset);
3696                if self.flush_zero_to_socket() {
3697                    self.expect_write = Some(H2StreamId::Zero);
3698                    // Edge-triggered epoll: ensure pending TLS data gets flushed
3699                    if self.socket.socket_wants_write() {
3700                        self.readiness.event.insert(Ready::WRITABLE);
3701                    }
3702                    return Some(MuxResult::Continue);
3703                }
3704            }
3705        }
3706
3707        None
3708    }
3709
3710    pub fn writable<E, L>(&mut self, context: &mut Context<L>, endpoint: E) -> MuxResult
3711    where
3712        E: Endpoint,
3713        L: ListenerHandler + L7ListenerHandler,
3714    {
3715        self.prune_inactive_streams_while_closing(context);
3716
3717        if let Some(result) = self.flush_pending_control_frames() {
3718            return result;
3719        }
3720
3721        // Flush any pending TLS records before state-specific processing.
3722        // This ensures response DATA frames that were accepted by rustls
3723        // (via socket_write_vectored in write_streams) are pushed to the
3724        // TCP socket even when the connection is in GoAway or Error state.
3725        // Without this, the state-specific handlers may call force_disconnect()
3726        // before the response data reaches the kernel's TCP send buffer.
3727        if self.socket.socket_wants_write() {
3728            self.socket.socket_write(&[]);
3729        }
3730
3731        match (&self.state, &self.position) {
3732            (H2State::Error, Position::Server) => {
3733                if self.socket.socket_wants_write() {
3734                    self.ensure_tls_flushed();
3735                    MuxResult::Continue
3736                } else {
3737                    MuxResult::CloseSession
3738                }
3739            }
3740            (H2State::Error, _)
3741            | (H2State::ClientSettings, Position::Server)
3742            | (H2State::ServerSettings, Position::Client(..)) => {
3743                error!(
3744                    "{} Unexpected combination: (Writable, {:?}, {:?})",
3745                    log_context!(self),
3746                    self.state,
3747                    self.position
3748                );
3749                self.force_disconnect()
3750            }
3751            (H2State::ClientPreface, Position::Server) => MuxResult::Continue,
3752            // Discard state: pending data (e.g. RST_STREAM) was already
3753            // written in the preamble above; let the readable path consume
3754            // the remaining frame payload.
3755            (H2State::Discard, _) => MuxResult::Continue,
3756            (H2State::GoAway, _) => {
3757                if self.peer_gone_after_final_goaway() {
3758                    return MuxResult::CloseSession;
3759                }
3760                // Flush any remaining TLS response data before disconnecting.
3761                // The GoAway state only enters after control frames (our GOAWAY
3762                // response) are flushed above, but response DATA frames may still
3763                // be in rustls's TLS output buffer — accepted by socket_write_vectored
3764                // during write_streams() but not yet flushed to TCP. Under TCP
3765                // backpressure (HAProxy chain), this is the primary truncation vector.
3766                if self.socket.socket_wants_write() {
3767                    self.socket.socket_write(&[]);
3768                    if self.socket.socket_wants_write() {
3769                        // TLS data still pending (TCP backpressure) — don't disconnect
3770                        // yet. Re-arm WRITABLE so the event loop retries the flush.
3771                        self.ensure_tls_flushed();
3772                        return MuxResult::Continue;
3773                    }
3774                }
3775                self.force_disconnect()
3776            }
3777            (H2State::ClientPreface, Position::Client(..)) => {
3778                trace!("{} Preparing preface and settings", log_context!(self));
3779                let pri = serializer::H2_PRI.as_bytes();
3780                let kawa = &mut self.zero;
3781
3782                kawa.storage.space()[0..pri.len()].copy_from_slice(pri);
3783                kawa.storage.fill(pri.len());
3784                match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
3785                    Ok((_, size)) => {
3786                        kawa.storage.fill(size);
3787                        incr!(names::h2::FRAMES_TX_SETTINGS);
3788                        // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
3789                        self.settings_sent_at = Some(Instant::now());
3790                    }
3791                    Err(error) => {
3792                        error!(
3793                            "{} Could not serialize SettingsFrame: {:?}",
3794                            log_context!(self),
3795                            error
3796                        );
3797                        return self.force_disconnect();
3798                    }
3799                };
3800
3801                self.state = H2State::ClientSettings;
3802                self.expect_write = Some(H2StreamId::Zero);
3803                MuxResult::Continue
3804            }
3805            (H2State::ClientSettings, Position::Client(..)) => {
3806                trace!("{} Sent preface and settings", log_context!(self));
3807                self.state = H2State::ServerSettings;
3808                self.expect_read = Some((H2StreamId::Zero, 9));
3809                self.readiness.interest.remove(Ready::WRITABLE);
3810                MuxResult::Continue
3811            }
3812            (H2State::ServerSettings, Position::Server) => {
3813                // Enlarge the connection-level receive window beyond the RFC default
3814                // of 65 535 bytes. The configured window size is too small for
3815                // high-throughput proxying and causes excessive WINDOW_UPDATE
3816                // round-trips. Use additive increment rather than unconditional
3817                // assignment to preserve any window changes that occurred during
3818                // setup. Skip if the configured window equals the default (no
3819                // enlargement needed), since a zero-increment WINDOW_UPDATE
3820                // violates RFC 9113 §6.9.
3821                let increment = self
3822                    .connection_config
3823                    .initial_connection_window
3824                    .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
3825                if increment > 0 {
3826                    self.queue_window_update(0, increment);
3827                }
3828                // Do NOT increment flow_control.window here: sending our own
3829                // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
3830                // Our send window is only updated by WINDOW_UPDATEs we receive
3831                // from the peer (RFC 9113 §6.9).
3832                self.expect_header();
3833                // Keep WRITABLE so the queued WINDOW_UPDATE gets flushed.
3834                MuxResult::Continue
3835            }
3836            // Proxying states — writing application data (request/response).
3837            // Reset the timeout here, not at the top of writable(), so that
3838            // control frame writes (PING, WINDOW_UPDATE) don't reset it.
3839            (H2State::Header, _)
3840            | (H2State::Frame(_), _)
3841            | (H2State::ContinuationFrame(_), _)
3842            | (H2State::ContinuationHeader(_), _) => self.write_streams(context, endpoint),
3843        }
3844    }
3845
3846    /// Snapshot the access-log RTTs for the local frontend and the linked backend.
3847    ///
3848    /// `Position::Server`-only. On a backend H2 connection (`Position::Client`)
3849    /// the snapshot would write swapped values onto the shared `Stream.metrics`:
3850    /// the connection's `socket` is the upstream and the corresponding
3851    /// `EndpointServer::socket` returns the frontend, so the per-stream
3852    /// `client_rtt`/`server_rtt` cells would be populated with mislabelled
3853    /// values. Gating keeps backend H2 from poisoning the access-log metric
3854    /// for the matching frontend stream.
3855    ///
3856    /// Callers must invoke this BEFORE `endpoint.end_stream(...)` on reset
3857    /// paths so the backend lookup does not depend on
3858    /// `EndpointClient::end_stream` continuing to leave entries in
3859    /// `Router.backends`.
3860    ///
3861    /// Takes individual field references (not `&self`) for the same reason
3862    /// `try_recycle_server_stream` does — to avoid borrow conflicts with the
3863    /// `H2BlockConverter` that holds `&mut self.encoder` during the per-stream
3864    /// write loop.
3865    fn snapshot_rtts<E: Endpoint>(
3866        position: &Position,
3867        socket: &Front,
3868        endpoint: &E,
3869        linked_token: Option<mio::Token>,
3870    ) -> (Option<Duration>, Option<Duration>) {
3871        if !position.is_server() {
3872            return (None, None);
3873        }
3874        (
3875            socket_rtt(socket.socket_ref()),
3876            linked_token
3877                .and_then(|t| endpoint.socket(t))
3878                .and_then(socket_rtt),
3879        )
3880    }
3881
3882    /// Try to recycle a completed server-side stream by distributing overhead,
3883    /// generating access logs, and transitioning the stream to `Recycle` state.
3884    ///
3885    /// Returns `Some((stream_id, Option<token>))` if the stream was recycled, so the
3886    /// caller can add `stream_id` to the dead-streams list and call `endpoint.end_stream()`
3887    /// if a token was returned. Returns `None` if recycling was deferred or not applicable.
3888    ///
3889    /// Takes individual field references instead of `&mut self` to avoid borrow
3890    /// conflicts when the H2 block converter holds `&mut self.encoder`.
3891    /// `client_rtt`/`server_rtt` are snapshotted by the caller (which still
3892    /// owns `&self.socket` and `&endpoint`) and forwarded into the access log.
3893    #[allow(clippy::too_many_arguments)]
3894    fn try_recycle_server_stream<L>(
3895        position: &Position,
3896        bytes: &mut H2ByteAccounting,
3897        streams: &HashMap<StreamId, GlobalStreamId>,
3898        stream: &mut crate::protocol::mux::Stream,
3899        global_stream_id: GlobalStreamId,
3900        stream_id: StreamId,
3901        byte_totals: (usize, usize),
3902        debug: &mut DebugHistory,
3903        listener: std::rc::Rc<std::cell::RefCell<L>>,
3904        client_rtt: Option<Duration>,
3905        server_rtt: Option<Duration>,
3906    ) -> Option<(StreamId, Option<mio::Token>)>
3907    where
3908        L: ListenerHandler + L7ListenerHandler,
3909    {
3910        match position {
3911            Position::Client(..) => None,
3912            Position::Server => {
3913                // Already logged by a reset path; retire the stream after its RST is flushed.
3914                if stream.metrics.start.is_none() {
3915                    let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3916                    return match state {
3917                        StreamState::Linked(token) => Some((stream_id, Some(token))),
3918                        _ => Some((stream_id, None)),
3919                    };
3920                }
3921
3922                // Don't recycle if the client hasn't sent END_STREAM yet —
3923                // more DATA frames may arrive for this stream.
3924                if !stream.front_received_end_of_stream {
3925                    trace!(
3926                        "{} Defer recycle stream {}: client still sending",
3927                        log_module_context!(),
3928                        global_stream_id
3929                    );
3930                    return None;
3931                }
3932                let stream_bytes = (
3933                    stream.metrics.bin + stream.metrics.backend_bin,
3934                    stream.metrics.bout + stream.metrics.backend_bout,
3935                );
3936                distribute_overhead(
3937                    &mut stream.metrics,
3938                    &mut bytes.overhead_bin,
3939                    &mut bytes.overhead_bout,
3940                    stream_bytes,
3941                    byte_totals,
3942                    streams.len(),
3943                    streams.len() == 1,
3944                );
3945                debug.push(DebugEvent::StreamEvent(4, global_stream_id));
3946                trace!(
3947                    "{} Recycle stream: {}",
3948                    log_module_context!(),
3949                    global_stream_id
3950                );
3951                let token = Self::complete_server_stream(stream, listener, client_rtt, server_rtt);
3952                Some((stream_id, token))
3953            }
3954        }
3955    }
3956
3957    /// Finalize a server-side stream after its response has been fully written.
3958    ///
3959    /// Generates an access log, resets metrics, and transitions the stream to `Recycle`.
3960    /// Returns the backend token if the stream was `Linked`, so the caller can call
3961    /// `endpoint.end_stream()` with the full `Context` (which can't be passed here
3962    /// because `stream` borrows from `context.streams`).
3963    ///
3964    /// Callers must distribute overhead *before* calling this, since the converter
3965    /// borrow may prevent `distribute_overhead()`.
3966    fn complete_server_stream<L>(
3967        stream: &mut crate::protocol::mux::Stream,
3968        listener: std::rc::Rc<std::cell::RefCell<L>>,
3969        client_rtt: Option<Duration>,
3970        server_rtt: Option<Duration>,
3971    ) -> Option<mio::Token>
3972    where
3973        L: ListenerHandler + L7ListenerHandler,
3974    {
3975        incr!(names::http::E2E_H2);
3976        stream.metrics.backend_stop();
3977        stream.generate_access_log(
3978            false,
3979            Some("H2::Complete"),
3980            listener,
3981            client_rtt,
3982            server_rtt,
3983        );
3984        stream.metrics.reset();
3985        let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3986        if let StreamState::Linked(token) = state {
3987            Some(token)
3988        } else {
3989            None
3990        }
3991    }
3992
3993    /// Compute the total bytes transferred across all active streams.
3994    ///
3995    /// Returns `(total_bytes_in, total_bytes_out)` where bytes_in = `bin + backend_bin`
3996    /// and bytes_out = `bout + backend_bout` for each stream.
3997    fn compute_stream_byte_totals<L: ListenerHandler + L7ListenerHandler>(
3998        &self,
3999        context: &Context<L>,
4000    ) -> (usize, usize) {
4001        let mut total_in = 0usize;
4002        let mut total_out = 0usize;
4003        for &gid in self.streams.values() {
4004            let m = &context.streams[gid].metrics;
4005            total_in += m.bin + m.backend_bin;
4006            total_out += m.bout + m.backend_bout;
4007        }
4008        (total_in, total_out)
4009    }
4010
4011    /// Distribute connection-level byte overhead proportionally to a single stream.
4012    ///
4013    /// `totals` should be pre-computed via [`compute_stream_byte_totals`] **before**
4014    /// taking a mutable borrow on the target stream, to avoid borrow conflicts.
4015    /// Delegates to the free function [`distribute_overhead`].
4016    fn distribute_overhead(&mut self, metrics: &mut SessionMetrics, totals: (usize, usize)) {
4017        let stream_bytes = (
4018            metrics.bin + metrics.backend_bin,
4019            metrics.bout + metrics.backend_bout,
4020        );
4021        distribute_overhead(
4022            metrics,
4023            &mut self.bytes.overhead_bin,
4024            &mut self.bytes.overhead_bout,
4025            stream_bytes,
4026            totals,
4027            self.streams.len(),
4028            self.streams.len() <= 1,
4029        );
4030    }
4031
4032    /// Attribute accumulated `zero_bytes_read` to the stream or to connection overhead.
4033    fn attribute_bytes_to_stream(&mut self, metrics: &mut SessionMetrics) {
4034        self.position
4035            .count_bytes_in(metrics, self.bytes.zero_bytes_read);
4036        self.bytes.zero_bytes_read = 0;
4037    }
4038
4039    fn attribute_bytes_to_overhead(&mut self) {
4040        self.bytes.overhead_bin += self.bytes.zero_bytes_read;
4041        self.bytes.zero_bytes_read = 0;
4042    }
4043
4044    /// Queue a WINDOW_UPDATE, coalescing with any existing entry for the same stream_id.
4045    /// RFC 9113 §6.9.1: window size increment MUST be 1..2^31-1 (0x7FFFFFFF).
4046    ///
4047    /// Always signals pending write so callers don't have to remember the
4048    /// edge-triggered epoll invariant (see memory feedback_epollet_signal_pending_write):
4049    /// under ET epoll a queued WINDOW_UPDATE without a live WRITABLE event bit
4050    /// is invisible to filter_interest() and will never get flushed.
4051    fn queue_window_update(&mut self, stream_id: u32, increment: u32) {
4052        let max_increment = i32::MAX as u32;
4053        if let Some(existing) = self.flow_control.pending_window_updates.get_mut(&stream_id) {
4054            let old = *existing;
4055            *existing = existing.saturating_add(increment).min(max_increment);
4056            // Coalescing invariant: the accumulated increment never decreases
4057            // and never exceeds i32::MAX (RFC 9113 §6.9 caps a WINDOW_UPDATE
4058            // increment at 2^31-1; emitting a larger value would be a protocol
4059            // error on the wire).
4060            debug_assert!(
4061                *existing >= old,
4062                "coalesced WINDOW_UPDATE increment must be monotonic non-decreasing"
4063            );
4064            debug_assert!(
4065                *existing <= max_increment,
4066                "coalesced WINDOW_UPDATE increment must stay within i32::MAX"
4067            );
4068            trace!(
4069                "{} WINDOW_UPDATE coalesced: stream={} old={} new={}",
4070                log_context!(self),
4071                stream_id,
4072                old,
4073                *existing
4074            );
4075        } else if self.flow_control.pending_window_updates.len() < self.max_pending_window_updates {
4076            self.flow_control
4077                .pending_window_updates
4078                .insert(stream_id, increment.min(max_increment));
4079            trace!(
4080                "{} WINDOW_UPDATE queued: stream={} increment={}",
4081                log_context!(self),
4082                stream_id,
4083                increment.min(max_increment)
4084            );
4085        } else {
4086            error!(
4087                "{} WINDOW_UPDATE dropped: queue full ({} entries), stream={} increment={}",
4088                log_context!(self),
4089                self.max_pending_window_updates,
4090                stream_id,
4091                increment
4092            );
4093            incr!(names::h2::WINDOW_UPDATE_DROPPED);
4094        }
4095        self.readiness.arm_writable();
4096    }
4097
4098    /// Re-enable READABLE if this connection is parked waiting for buffer space
4099    /// and the target stream's buffer now has enough room.
4100    ///
4101    /// This is the cross-readiness counterpart to the same-connection check in
4102    /// `writable()`. When the *other side* of a stream (frontend or backend)
4103    /// drains data via its own `writable()`, it frees buffer space that this
4104    /// connection was waiting for. Without this explicit wake-up the connection
4105    /// stays parked and the session deadlocks until a timeout fires.
4106    ///
4107    /// Returns `true` if READABLE was re-enabled.
4108    pub fn try_resume_reading<L>(&mut self, context: &Context<L>) -> bool
4109    where
4110        L: ListenerHandler + L7ListenerHandler,
4111    {
4112        if let Some((
4113            H2StreamId::Other {
4114                gid: global_stream_id,
4115                ..
4116            },
4117            amount,
4118        )) = self.expect_read
4119        {
4120            let stream = &context.streams[global_stream_id];
4121            let kawa = match self.position {
4122                Position::Client(..) => &stream.back,
4123                Position::Server => &stream.front,
4124            };
4125            if kawa.storage.available_space() >= amount {
4126                self.readiness.interest.insert(Ready::READABLE);
4127                return true;
4128            }
4129        }
4130        false
4131    }
4132
4133    /// Mark a stream's position-appropriate end-of-stream flag.
4134    ///
4135    /// Server reads from the front (client), so sets `front_received_end_of_stream`.
4136    /// Client reads from the back (backend), so sets `back_received_end_of_stream`.
4137    fn mark_end_of_stream(&self, stream: &mut crate::protocol::mux::Stream) {
4138        if self.position.is_server() {
4139            stream.front_received_end_of_stream = true;
4140        } else {
4141            stream.back_received_end_of_stream = true;
4142        }
4143    }
4144
4145    /// Cancel streams that have been idle longer than [`Self::stream_idle_timeout`].
4146    ///
4147    /// A stream is considered idle when no meaningful application data (non-empty
4148    /// DATA frames or HEADERS) has been received since the last activity timestamp
4149    /// in [`Self::stream_last_activity_at`].
4150    ///
4151    /// Mitigates slow-multiplex Slowloris (Pass 4 Medium #3): the connection-level
4152    /// idle timer resets on every frame, so a peer sending periodic control frames
4153    /// can pin `max_concurrent_streams` slots for the full nominal connection timeout.
4154    /// Per-stream idle deadlines guarantee each stream terminates if it stops making
4155    /// forward progress, regardless of connection-level liveness.
4156    ///
4157    /// Timed-out streams receive RST_STREAM(CANCEL) and are immediately removed
4158    /// from the streams map so they no longer count against MAX_CONCURRENT_STREAMS.
4159    /// Backend endpoints are notified and metrics are finalized.
4160    pub fn cancel_timed_out_streams<E, L>(&mut self, context: &mut Context<L>, endpoint: &mut E)
4161    where
4162        E: Endpoint,
4163        L: ListenerHandler + L7ListenerHandler,
4164    {
4165        // Per-connection scratch Vecs (`converter_buf`, `lowercase_buf`,
4166        // `cookie_buf`, `priorities_buf`) grow to a
4167        // high-water mark and never shrink. On a long-lived idle H2
4168        // connection that briefly carried a flurry of large headers, the
4169        // backing memory stays pinned indefinitely. Reclaim past
4170        // `SCRATCH_BUF_RETAIN` when the connection has live streams but
4171        // each scratch buffer holds 4× the cap. Quiet-time only — runs
4172        // at the top of every `cancel_timed_out_streams` invocation
4173        // (which is itself called from the readable hot loop, but only
4174        // on a session that has been idle long enough to risk timing
4175        // out a stream).
4176        const SCRATCH_BUF_RETAIN: usize = 16 * 1024;
4177        if self.converter_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4178            self.converter_buf.shrink_to(SCRATCH_BUF_RETAIN);
4179        }
4180        if self.lowercase_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4181            self.lowercase_buf.shrink_to(SCRATCH_BUF_RETAIN);
4182        }
4183        if self.cookie_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4184            self.cookie_buf.shrink_to(SCRATCH_BUF_RETAIN);
4185        }
4186        if self.priorities_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4187            self.priorities_buf.shrink_to(SCRATCH_BUF_RETAIN);
4188        }
4189
4190        if self.streams.is_empty()
4191            || (self.stream_last_activity_at.is_empty() && self.stream_fc_stalled_since.is_empty())
4192        {
4193            return;
4194        }
4195        let now = Instant::now();
4196        let deadline = self.stream_idle_timeout;
4197        // Two independent per-stream guards reap on the same deadline — see
4198        // `collect_timed_out_streams`. The flow-control-stall guard
4199        // (`stream_fc_stalled_since`) closes the HTTP/2 window-stall vector that
4200        // the bidirectional liveness guard (`stream_last_activity_at`) misses,
4201        // because an inbound DATA drip keeps the liveness timer warm while the
4202        // response stays window-blocked.
4203        let timed_out = collect_timed_out_streams(
4204            &self.stream_last_activity_at,
4205            &self.stream_fc_stalled_since,
4206            &self.streams,
4207            &self.rst_sent,
4208            now,
4209            deadline,
4210        );
4211        if timed_out.is_empty() {
4212            return;
4213        }
4214        for (sid, reason) in timed_out {
4215            info!(
4216                "{} H2 stream {} exceeded {:?} ({}), cancelling",
4217                log_context!(self),
4218                sid,
4219                deadline,
4220                reason
4221            );
4222            // M1: break reaps down by guard so a window-stall reap (a DoS
4223            // mitigation) is distinguishable from an ordinary idle reap on a
4224            // dashboard. M2: a window-stall reap whose stream dribbled some
4225            // outbound progress (`acc > 0`) below the floor is specifically a
4226            // stall-budget reap — the `WINDOW_UPDATE`-drip vector the budget
4227            // closes — counted as a subset. Read the accumulator BEFORE
4228            // `remove_dead_stream` evicts it below.
4229            match reason {
4230                "H2::WindowStall" => {
4231                    count!(names::h2::STREAMS_REAPED_WINDOW_STALL, 1);
4232                    if matches!(self.stream_fc_stalled_progress.get(&sid), Some(&acc) if acc > 0) {
4233                        count!(names::h2::STREAMS_REAPED_STALL_BUDGET, 1);
4234                    }
4235                }
4236                "H2::IdleTimeout" => count!(names::h2::STREAMS_REAPED_IDLE_TIMEOUT, 1),
4237                other => debug!("{} unexpected reap reason {}", log_context!(self), other),
4238            }
4239            // Route through the canonical chokepoint so dedupe (rst_sent),
4240            // queued-cap accounting (MAX_PENDING_RST_STREAMS via
4241            // total_rst_streams_queued), and edge-triggered-epoll arming
4242            // (Readiness::arm_writable) all stay consistent — see LIFECYCLE
4243            // §8.2. The previous direct push bypassed all three: a peer
4244            // that opens 200 streams and lets them all idle past
4245            // stream_idle_timeout could push past the queued cap silently
4246            // (no GOAWAY(ENHANCE_YOUR_CALM) escalation), a double-cancel
4247            // pass would grow pending_rst_streams instead of short-
4248            // circuiting on the existing rst_sent membership, and the
4249            // hand-rolled `interest.insert(WRITABLE) + signal_pending_write`
4250            // pair below skipped invariant 15. Counting these RSTs against
4251            // the cap is a deliberate behaviour change: 200 cumulative idle
4252            // cancellations from one peer IS abusive (pinning
4253            // MAX_CONCURRENT_STREAMS slots), and the GOAWAY(ENHANCE_YOUR_CALM)
4254            // escalation tells the peer to reconnect with a clean state.
4255            //
4256            // We deliberately ignore the `Option<MuxResult>` flood-violation
4257            // signal here — `cancel_timed_out_streams` returns `()` and is
4258            // called as best-effort housekeeping during the read path. A
4259            // flood violation that becomes visible mid-iteration will be
4260            // re-detected on the next `record_rst_emitted` call (the
4261            // counter is sticky), so dropping the early-return is safe.
4262            let _ = self.enqueue_rst(sid, H2Error::Cancel);
4263
4264            // Remove from streams map and recycle the context stream so the slot
4265            // no longer counts against MAX_CONCURRENT_STREAMS.
4266            // Compute totals per-stream before remove (matches RST_STREAM handler).
4267            let byte_totals = self.compute_stream_byte_totals(context);
4268            if let Some(global_stream_id) = self.streams.get(&sid).copied() {
4269                {
4270                    let stream = &mut context.streams[global_stream_id];
4271                    self.attribute_bytes_to_stream(&mut stream.metrics);
4272                }
4273                // Check if stream is linked to a backend — borrow must be scoped
4274                // so end_stream can take &mut context.
4275                let linked_token = context.streams[global_stream_id].linked_token();
4276                let (client_rtt, server_rtt) =
4277                    Self::snapshot_rtts(&self.position, &self.socket, &*endpoint, linked_token);
4278                if let Some(token) = linked_token {
4279                    endpoint.end_stream(token, global_stream_id, context);
4280                }
4281                let stream = &mut context.streams[global_stream_id];
4282                match &self.position {
4283                    Position::Client(_, backend, BackendStatus::Connected) => {
4284                        let mut backend_borrow = backend.borrow_mut();
4285                        backend_borrow.active_requests =
4286                            backend_borrow.active_requests.saturating_sub(1);
4287                    }
4288                    Position::Client(..) => {}
4289                    Position::Server => {
4290                        self.distribute_overhead(&mut stream.metrics, byte_totals);
4291                        stream.metrics.backend_stop();
4292                        stream.generate_access_log(
4293                            true,
4294                            Some(reason),
4295                            context.listener.clone(),
4296                            client_rtt,
4297                            server_rtt,
4298                        );
4299                        stream.state = StreamState::Recycle;
4300                    }
4301                }
4302                // Retire sid from streams/prioriser/stream_last_activity_at and
4303                // invalidate expect_write/expect_read if they reference this gid.
4304                self.remove_dead_stream(sid, global_stream_id);
4305            }
4306        }
4307        // Writable arming is already done by enqueue_rst -> arm_writable in
4308        // the loop above; the trailing pair was redundant after the chokepoint
4309        // routing landed.
4310    }
4311
4312    /// Queue a `RST_STREAM` frame for serialisation by
4313    /// [`Self::flush_pending_control_frames`] on the next writable tick.
4314    ///
4315    /// This is the canonical entry point for proxy-emitted stream resets:
4316    /// `DATA` on a closed stream, `MAX_CONCURRENT_STREAMS` refusal, and the
4317    /// per-stream error paths in [`Self::reset_stream`] all funnel through
4318    /// here. Serialisation is independent of the owning `Stream` still
4319    /// existing in `self.streams`, which is what lets us emit even after a
4320    /// caller has already called [`Self::remove_dead_stream`].
4321    ///
4322    /// Delegates the primitive work to [`enqueue_rst_into`] so the invariants
4323    /// are covered by unit tests that don't need a full `ConnectionH2`
4324    /// fixture. See that function's doc-comment for the three invariants
4325    /// (dedupe via `rst_sent`, MadeYouReset queued cap via
4326    /// `total_rst_streams_queued`, edge-triggered-epoll arm via
4327    /// [`Readiness::arm_writable`]).
4328    fn enqueue_rst(&mut self, wire_stream_id: StreamId, error: H2Error) -> Option<MuxResult> {
4329        let freshly_queued = enqueue_rst_into(
4330            &mut self.pending_rst_streams,
4331            &mut self.total_rst_streams_queued,
4332            &mut self.rst_sent,
4333            &mut self.readiness,
4334            wire_stream_id,
4335            error,
4336        );
4337        // Account ONLY when a new RST actually entered the queue.
4338        // Calling `enqueue_rst` for a stream that already has a queued
4339        // (or already-flushed) RST is the dedup short-circuit — counting
4340        // those would inflate `h2.frames.tx.rst_stream` /
4341        // `h2.rst_stream.sent.*` and trip the CVE-2025-8671 MadeYouReset
4342        // lifetime cap on frames that never reached the wire.
4343        //
4344        // Account at queue-time, not at drain-time. Doing it later in
4345        // `flush_pending_control_frames` would double-count any RST that
4346        // a re-entrant call (DATA on a closed stream we already RSTed)
4347        // tried to enqueue — and missing it at queue-time leaves
4348        // `cancel_timed_out_streams` / `refuse_stream_and_discard` /
4349        // DATA-on-closed-stream paths bypassing the lifetime cap
4350        // (security review LISA-001 on commit `da845c71`).
4351        if freshly_queued {
4352            self.account_emitted_rst(error)
4353        } else {
4354            None
4355        }
4356    }
4357
4358    /// Single accounting site for proxy-emitted RST_STREAM frames.
4359    /// Three things must happen for every emitted RST so flood-protection
4360    /// stays honest: the global tx counter, the per-error breakdown,
4361    /// and the MadeYouReset emitted-RST lifetime cap.
4362    ///
4363    /// Two distinct emission paths feed this helper:
4364    ///   * Queued frames — [`Self::enqueue_rst`] (and therefore every
4365    ///     callable that funnels through it: `reset_stream`,
4366    ///     `refuse_stream_and_discard`, `cancel_timed_out_streams`,
4367    ///     DATA-on-closed-stream) calls this once at queue-time. The
4368    ///     drain in `flush_pending_control_frames` does NOT call it
4369    ///     again — that would double-count.
4370    ///   * Converter-emitted frames — the converter's `initialize`
4371    ///     chokepoint (and the HPACK over-budget abort path) writes
4372    ///     RST_STREAM frames straight into `kawa.out` from inside
4373    ///     `kawa.prepare`. We collect those `H2Error` codes during the
4374    ///     `write_streams` loop and call this helper for each one
4375    ///     after `drop(converter)` (because the converter holds
4376    ///     `&mut self.encoder`).
4377    ///
4378    /// Returning `Some(MuxResult)` means the caller MUST short-circuit
4379    /// with that result — the flood detector tripped its lifetime cap
4380    /// and converted to a connection-wide GOAWAY.
4381    fn account_emitted_rst(&mut self, error: H2Error) -> Option<MuxResult> {
4382        incr!(names::h2::FRAMES_TX_RST_STREAM);
4383        count!(metric_for_rst_stream_sent(error), 1);
4384        if !matches!(error, H2Error::NoError) {
4385            if let Some(violation) = self.flood_detector.record_rst_emitted() {
4386                return Some(self.handle_flood_violation(violation));
4387            }
4388        }
4389        None
4390    }
4391
4392    /// Refuse a newly-opened stream with RST_STREAM and discard its HEADERS payload.
4393    ///
4394    /// Used when MAX_CONCURRENT_STREAMS is exceeded or buffer pool is exhausted.
4395    /// Queues the RST_STREAM for the writable path (can't write to kawa.storage
4396    /// here because it is needed to discard the HEADERS payload).
4397    ///
4398    /// Also applies SETTINGS back-pressure per RFC 9113 §5.1.2: if refusals
4399    /// burst past [`BACKPRESSURE_REFUSAL_THRESHOLD`] within
4400    /// [`BACKPRESSURE_WINDOW_DURATION`], the advertised
4401    /// `SETTINGS_MAX_CONCURRENT_STREAMS` is halved via
4402    /// [`Self::apply_mcs_backpressure`].
4403    fn refuse_stream_and_discard(
4404        &mut self,
4405        stream_id: StreamId,
4406        error: H2Error,
4407        payload_len: u32,
4408    ) -> MuxResult {
4409        if let Some(result) = self.enqueue_rst(stream_id, error) {
4410            return result;
4411        }
4412        self.state = H2State::Discard;
4413        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
4414        self.record_refusal_for_backpressure();
4415        MuxResult::Continue
4416    }
4417
4418    /// RFC 9113 §5.1.2 SETTINGS back-pressure bookkeeping.
4419    ///
4420    /// Increments the refusal counter for the current back-pressure window
4421    /// and, when the burst threshold is crossed, halves the advertised
4422    /// `SETTINGS_MAX_CONCURRENT_STREAMS`. Further halving attempts in the
4423    /// same connection are suppressed by [`Self::mcs_backpressure_applied`]
4424    /// so sustained abuse does not collapse the cap to zero — callers can
4425    /// still promote the situation to `EnhanceYourCalm` via the flood
4426    /// detector.
4427    fn record_refusal_for_backpressure(&mut self) {
4428        if self.refuse_window_start.elapsed() >= BACKPRESSURE_WINDOW_DURATION {
4429            self.refuse_count_window = 0;
4430            self.refuse_window_start = Instant::now();
4431        }
4432        self.refuse_count_window = self.refuse_count_window.saturating_add(1);
4433        if !self.mcs_backpressure_applied
4434            && self.refuse_count_window >= BACKPRESSURE_REFUSAL_THRESHOLD
4435        {
4436            self.apply_mcs_backpressure();
4437        }
4438    }
4439
4440    /// Halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` and mark the
4441    /// back-pressure state as applied. The new value takes effect locally
4442    /// immediately — subsequent stream-open checks in `handle_header_state`
4443    /// compare `self.streams.len()` against this reduced cap, so the peer
4444    /// starts receiving `REFUSED_STREAM` earlier. A full SETTINGS re-send on
4445    /// the wire is deferred until we have a mid-connection SETTINGS queue
4446    /// (the existing path in `handle_preface_state` only fires during the
4447    /// handshake); this is noted in the task log as a minimal first step.
4448    fn apply_mcs_backpressure(&mut self) {
4449        let previous = self.local_settings.settings_max_concurrent_streams;
4450        let reduced = (previous / 2).max(1);
4451        warn!(
4452            "{} H2 SETTINGS back-pressure: refusals={} in {}s — halving \
4453             SETTINGS_MAX_CONCURRENT_STREAMS {} -> {}",
4454            log_context!(self),
4455            self.refuse_count_window,
4456            BACKPRESSURE_WINDOW_DURATION.as_secs(),
4457            previous,
4458            reduced,
4459        );
4460        self.local_settings.settings_max_concurrent_streams = reduced;
4461        self.mcs_backpressure_applied = true;
4462    }
4463
4464    /// Log a flood violation with full session context and emit the GOAWAY.
4465    ///
4466    /// Centralises the "flood detected" reporting so every site that observes a
4467    /// [`H2FloodViolation`] gets the same session-scoped log line, matching the
4468    /// RUSTLS log-context convention. Also emits the per-kind statsd counter
4469    /// (`h2.flood.violation.<kind>`) so SOC dashboards can window the trip
4470    /// rate without parsing logs — every CVE-mitigation in the H2 family
4471    /// (Rapid Reset, MadeYouReset, CONTINUATION/PING/SETTINGS floods, header
4472    /// overflow, glitch) funnels through this site.
4473    pub fn handle_flood_violation(&mut self, violation: H2FloodViolation) -> MuxResult {
4474        count!(violation.metric_key, 1);
4475        warn!(
4476            "{} H2 flood detected: {} count {} exceeds threshold {}",
4477            log_context!(self),
4478            violation.reason,
4479            violation.count,
4480            violation.threshold,
4481        );
4482        self.goaway(violation.error)
4483    }
4484}
4485
4486/// Recover the [`H2Error`] code that the converter's `initialize`
4487/// chokepoint will encode into the synthesised RST_STREAM frame for a
4488/// kawa stuck in [`kawa::ParsingPhase::Error`]. Mirrors the parse +
4489/// fallback at `lib/src/protocol/mux/converter.rs::initialize` so the
4490/// flood-accounting helper sees the same code that lands on the wire.
4491fn rst_error_from_kawa<T: kawa::AsBuffer>(kawa: &kawa::Kawa<T>) -> H2Error {
4492    match kawa.parsing_phase {
4493        kawa::ParsingPhase::Error {
4494            kind: kawa::ParsingErrorKind::Processing { message },
4495            ..
4496        } => message.parse::<H2Error>().unwrap_or(H2Error::InternalError),
4497        _ => H2Error::InternalError,
4498    }
4499}
4500
4501/// Compile-time mapping from `(prefix, H2Error)` to a static metric key.
4502///
4503/// Materialises a `&'static str` literal via `concat!`, so the metric key
4504/// never crosses through a heap allocation and the statsd drain can store it
4505/// as `&'static str`. Adding a new `H2Error` variant fails the build here —
4506/// the metric breakdown stays in lock-step with RFC 9113 §7 codes.
4507///
4508/// Used for the per-error-code counters emitted around GOAWAY and RST_STREAM
4509/// in either direction (see `metric_for_goaway_sent` etc. below).
4510macro_rules! h2_error_metric_key {
4511    ($prefix:literal, $error:expr) => {
4512        match $error {
4513            H2Error::NoError => concat!($prefix, ".no_error"),
4514            H2Error::ProtocolError => concat!($prefix, ".protocol_error"),
4515            H2Error::InternalError => concat!($prefix, ".internal_error"),
4516            H2Error::FlowControlError => concat!($prefix, ".flow_control_error"),
4517            H2Error::SettingsTimeout => concat!($prefix, ".settings_timeout"),
4518            H2Error::StreamClosed => concat!($prefix, ".stream_closed"),
4519            H2Error::FrameSizeError => concat!($prefix, ".frame_size_error"),
4520            H2Error::RefusedStream => concat!($prefix, ".refused_stream"),
4521            H2Error::Cancel => concat!($prefix, ".cancel"),
4522            H2Error::CompressionError => concat!($prefix, ".compression_error"),
4523            H2Error::ConnectError => concat!($prefix, ".connect_error"),
4524            H2Error::EnhanceYourCalm => concat!($prefix, ".enhance_your_calm"),
4525            H2Error::InadequateSecurity => concat!($prefix, ".inadequate_security"),
4526            H2Error::HTTP11Required => concat!($prefix, ".http_1_1_required"),
4527        }
4528    };
4529}
4530
4531/// Static metric key for an outbound GOAWAY. Same call shape as the other three
4532/// helpers below — keeps the call sites uniform.
4533fn metric_for_goaway_sent(error: H2Error) -> &'static str {
4534    h2_error_metric_key!("h2.goaway.sent", error)
4535}
4536
4537/// Static metric key for an inbound GOAWAY by raw wire error code. Codes
4538/// outside RFC 9113 §7 fall into the dedicated `…unknown_error` bucket so the
4539/// breakdown stays bounded and operators can still spot non-standard peers.
4540fn metric_for_goaway_received(error_code: u32) -> &'static str {
4541    H2Error::try_from(error_code)
4542        .map(|e| h2_error_metric_key!("h2.goaway.received", e))
4543        .unwrap_or("h2.goaway.received.unknown_error")
4544}
4545
4546/// Static metric key for an outbound RST_STREAM. Mirrors
4547/// [`metric_for_goaway_sent`] under a separate namespace so RST and GOAWAY
4548/// rates can be alerted on independently.
4549fn metric_for_rst_stream_sent(error: H2Error) -> &'static str {
4550    h2_error_metric_key!("h2.rst_stream.sent", error)
4551}
4552
4553/// Static metric key for an inbound RST_STREAM by raw wire error code. Same
4554/// `…unknown_error` fallback as [`metric_for_goaway_received`].
4555fn metric_for_rst_stream_received(error_code: u32) -> &'static str {
4556    H2Error::try_from(error_code)
4557        .map(|e| h2_error_metric_key!("h2.rst_stream.received", e))
4558        .unwrap_or("h2.rst_stream.received.unknown_error")
4559}
4560
4561/// Static metric key for an inbound H2 frame by RFC 9113 §6 frame type.
4562/// Emitted at the `handle_frame` dispatch — single chokepoint that any
4563/// new H2 frame type must traverse, so adding a `Frame::*` variant fails
4564/// the build here. Counts are per-frame, not per-byte; pair with
4565/// `bytes_in` for traffic-mix dashboards.
4566fn h2_frame_rx_metric_key(frame: &Frame) -> &'static str {
4567    match frame {
4568        Frame::Data(_) => "h2.frames.rx.data",
4569        Frame::Headers(_) => "h2.frames.rx.headers",
4570        Frame::PushPromise(_) => "h2.frames.rx.push_promise",
4571        Frame::Priority(_) => "h2.frames.rx.priority",
4572        Frame::RstStream(_) => "h2.frames.rx.rst_stream",
4573        Frame::Settings(_) => "h2.frames.rx.settings",
4574        Frame::Ping(_) => "h2.frames.rx.ping",
4575        Frame::GoAway(_) => "h2.frames.rx.goaway",
4576        Frame::WindowUpdate(_) => "h2.frames.rx.window_update",
4577        Frame::Continuation(_) => "h2.frames.rx.continuation",
4578        Frame::PriorityUpdate(_) => "h2.frames.rx.priority_update",
4579        Frame::Unknown(_) => "h2.frames.rx.unknown",
4580    }
4581}
4582
4583impl<Front: SocketHandler> ConnectionH2<Front> {
4584    pub fn goaway(&mut self, error: H2Error) -> MuxResult {
4585        self.state = H2State::Error;
4586        self.drain.draining = true;
4587        self.expect_read = None;
4588        // Disarm the SETTINGS ACK timer: once we've committed to GOAWAY, the
4589        // timeout check at `readable()` / `flush_pending_control_frames()` must
4590        // not re-fire. Without this, `signal_pending_write()` below re-enters
4591        // `writable()` → `flush_pending_control_frames()` on the next tick,
4592        // the elapsed check is still true, and we emit another
4593        // `warn!` + `goaway()` pair, each bumping `h2.goaway.sent.*`.
4594        self.settings_sent_at = None;
4595        let kawa = &mut self.zero;
4596        kawa.storage.clear();
4597        // Severity tiering: only `InternalError` implies a sozu-side bug when
4598        // WE emit it. Every other non-`NoError` reason is "peer misbehaved,
4599        // sozu defended correctly" — operators don't need paging on abusive
4600        // or buggy peers. Caller sites already log the specific antecedent
4601        // (flood detected, parser failure, SETTINGS timeout, invalid window)
4602        // before reaching `goaway()`, so demoting this summary line avoids
4603        // duplicate noise without hiding the root cause.
4604        match error {
4605            H2Error::NoError => debug!("{} GOAWAY: {:?}", log_context!(self), error),
4606            H2Error::InternalError => error!("{} GOAWAY: {:?}", log_context!(self), error),
4607            _ => warn!("{} GOAWAY: {:?}", log_context!(self), error),
4608        }
4609        count!(metric_for_goaway_sent(error), 1);
4610
4611        // RFC 9113 §6.8: last_stream_id is the highest peer-initiated stream we processed
4612        match serializer::gen_goaway(kawa.storage.space(), self.highest_peer_stream_id, error) {
4613            Ok((_, size)) => {
4614                kawa.storage.fill(size);
4615                incr!(names::h2::FRAMES_TX_GOAWAY);
4616                self.state = H2State::GoAway;
4617                self.expect_write = Some(H2StreamId::Zero);
4618                self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
4619                self.readiness.signal_pending_write();
4620                MuxResult::Continue
4621            }
4622            Err(error) => {
4623                error!(
4624                    "{} Could not serialize GoAwayFrame: {:?}",
4625                    log_context!(self),
4626                    error
4627                );
4628                self.force_disconnect()
4629            }
4630        }
4631    }
4632
4633    /// RFC 9113 §6.8: Initiate graceful shutdown using the double-GOAWAY pattern.
4634    ///
4635    /// First call sends GOAWAY with `last_stream_id = 0x7FFFFFFF` (MAX) to signal
4636    /// the intent to stop accepting new streams while allowing in-flight streams
4637    /// to complete. The connection enters draining mode.
4638    ///
4639    /// When `draining` is already true (second invocation), sends the final GOAWAY
4640    /// with the actual `highest_peer_stream_id` so the peer knows which streams
4641    /// were processed.
4642    pub fn graceful_goaway(&mut self) -> MuxResult {
4643        if self.drain.draining {
4644            // Second GOAWAY: send with the real last_stream_id
4645            return self.goaway(H2Error::NoError);
4646        }
4647
4648        // First GOAWAY: advertise MAX stream ID so the peer knows we are draining
4649        // but does not yet know the cutoff. This gives in-flight requests a chance
4650        // to arrive before we commit to a final last_stream_id.
4651        self.drain.draining = true;
4652        // Arm the forced-close timer from the moment the proxy decides to drain.
4653        // `Mux::shutting_down` samples it against `graceful_shutdown_deadline`
4654        // and returns `true` once the budget is exhausted so the session loop
4655        // tears the connection down instead of waiting forever.
4656        self.drain.started_at = Some(Instant::now());
4657        // Keep expect_read as-is: existing streams should continue reading
4658        // data during the drain window opened by the initial GOAWAY. Only
4659        // the final GOAWAY (via `goaway()`) removes READABLE.
4660        let kawa = &mut self.zero;
4661        kawa.storage.clear();
4662        debug!(
4663            "{} GOAWAY (graceful, initial): last_stream_id=0x7FFFFFFF",
4664            log_context!(self)
4665        );
4666        // The initial GOAWAY sends NO_ERROR on the wire — count it under
4667        // the same per-code key as the final GOAWAY. The downstream alert
4668        // that wants to distinguish drain from termination compares
4669        // against the `h2.goaway.sent.no_error` rate (drain) vs the other
4670        // variants (termination on error).
4671        count!(metric_for_goaway_sent(H2Error::NoError), 1);
4672
4673        match serializer::gen_goaway(kawa.storage.space(), STREAM_ID_MAX, H2Error::NoError) {
4674            Ok((_, size)) => {
4675                kawa.storage.fill(size);
4676                incr!(names::h2::FRAMES_TX_GOAWAY);
4677                // Stay in the current state so the connection can continue processing
4678                // existing streams. The final GOAWAY will transition to GoAway state.
4679                // Keep READABLE so in-flight request bodies can still be received
4680                // during the drain window. Only remove READABLE in the final GOAWAY
4681                // (via `goaway()`).
4682                self.expect_write = Some(H2StreamId::Zero);
4683                self.readiness.arm_writable();
4684                MuxResult::Continue
4685            }
4686            Err(error) => {
4687                error!(
4688                    "{} Could not serialize graceful GoAwayFrame: {:?}",
4689                    log_context!(self),
4690                    error
4691                );
4692                self.force_disconnect()
4693            }
4694        }
4695    }
4696
4697    /// Returns `true` when the graceful-shutdown budget armed by
4698    /// [`Self::graceful_goaway`] has elapsed. A return of `true` signals
4699    /// the enclosing session loop that the proxy-initiated drain must
4700    /// transition to a forced close: remaining streams will not complete
4701    /// in time and keeping the connection open past the deadline defeats
4702    /// the soft-stop SLA.
4703    ///
4704    /// Returns `false` when:
4705    /// - drain has not started yet (`started_at` is `None`),
4706    /// - the knob is `0` / `None` (indefinite wait explicitly opted in),
4707    /// - or the elapsed time is still within the configured budget.
4708    pub fn graceful_shutdown_deadline_elapsed(&self) -> bool {
4709        match (self.drain.started_at, self.drain.graceful_shutdown_deadline) {
4710            (Some(started_at), Some(deadline)) => started_at.elapsed() >= deadline,
4711            _ => false,
4712        }
4713    }
4714
4715    /// Returns `true` if there is data queued waiting to be flushed:
4716    /// - H2 control frames in the zero buffer (GOAWAY, SETTINGS ACK, etc.)
4717    /// - A partially-written stream or control frame (`expect_write`)
4718    /// - Encrypted TLS records in rustls's output buffer not yet flushed to TCP
4719    ///
4720    /// The TLS check is critical: `shutting_down()` uses this to prevent
4721    /// premature session close while response DATA is still in rustls's
4722    /// buffer (accepted by `socket_write_vectored` but not yet on the wire).
4723    ///
4724    /// Does NOT check per-stream `back.out`/`back.blocks`; use
4725    /// [`Self::has_pending_write_full`] on paths that must honour
4726    /// LIFECYCLE invariant 16 (e.g. shutdown-drain).
4727    pub fn has_pending_write(&self) -> bool {
4728        if self.peer_gone_after_final_goaway() {
4729            return false;
4730        }
4731        self.expect_write.is_some()
4732            || !self.zero.storage.is_empty()
4733            || self.socket.socket_wants_write()
4734    }
4735
4736    /// True when the reaper has queued control frames (`RST_STREAM`) into
4737    /// `pending_rst_streams` that have not yet been serialized. Kept SEPARATE
4738    /// from [`Self::has_pending_write`] because that probe gates connection close
4739    /// (the `mod.rs` close-gating sites) and must NOT treat a queued RST as a
4740    /// reason to keep the connection open; this probe is consulted ONLY by the
4741    /// `MuxState::timeout` flush gate to push a silent-peer `RST_STREAM(CANCEL)`
4742    /// onto the wire before the connection closes.
4743    pub fn has_pending_control_write(&self) -> bool {
4744        !self.pending_rst_streams.is_empty()
4745    }
4746
4747    /// Connection-level [`Self::has_pending_write`] extended with a per-stream
4748    /// back-buffer probe (LIFECYCLE §9 invariant 16). Used by shutdown-drain
4749    /// paths that must not close while any open stream still has outbound
4750    /// kawa bytes queued — a voluntary scheduler yield can leave `back.out`
4751    /// or `back.blocks` non-empty without `expect_write` being set.
4752    pub fn has_pending_write_full<L>(&self, context: &Context<L>) -> bool
4753    where
4754        L: ListenerHandler + L7ListenerHandler,
4755    {
4756        self.has_pending_write() || any_stream_has_pending_back(&self.streams, &context.streams)
4757    }
4758
4759    /// Flush the zero buffer to the socket, counting bytes as connection overhead.
4760    ///
4761    /// Returns `true` if the socket stalled (WouldBlock / zero-length write),
4762    /// meaning the caller should stop writing and wait for the next writable event.
4763    /// Returns `false` when the buffer has been fully drained.
4764    fn flush_zero_to_socket(&mut self) -> bool {
4765        while !self.zero.storage.is_empty() {
4766            let (size, status) = self.socket.socket_write(self.zero.storage.data());
4767            #[cfg(debug_assertions)]
4768            trace!(
4769                "{} flush_zero_to_socket: written={}, status={:?}, wants_write={}",
4770                log_context!(self),
4771                size,
4772                status,
4773                self.socket.socket_wants_write()
4774            );
4775            self.zero.storage.consume(size);
4776            self.position.count_bytes_out_counter(size);
4777            self.bytes.overhead_bout += size;
4778            if update_readiness_after_write(size, status, &mut self.readiness) {
4779                return true;
4780            }
4781        }
4782        // Reset buffer positions after draining. consume() advances start but
4783        // never resets it, so without clear() the next fill would panic.
4784        self.zero.storage.clear();
4785        false
4786    }
4787
4788    /// Directly flush the zero buffer to the socket without going through
4789    /// the full writable() path. Used during shutdown when the event loop
4790    /// won't deliver new epoll events for this session (edge-triggered).
4791    pub fn flush_zero_buffer(&mut self) {
4792        if self.flush_zero_to_socket() {
4793            return;
4794        }
4795        self.expect_write = None;
4796        if self.socket.socket_wants_write() {
4797            let (_size, status) = self.socket.socket_write(&[]);
4798            let _ = update_readiness_after_write(0, status, &mut self.readiness);
4799        }
4800    }
4801
4802    pub fn create_stream<L>(
4803        &mut self,
4804        stream_id: StreamId,
4805        context: &mut Context<L>,
4806    ) -> Option<GlobalStreamId>
4807    where
4808        L: ListenerHandler + L7ListenerHandler,
4809    {
4810        // RFC 9113 §6.8: reject new streams on a draining connection
4811        if self.drain.draining {
4812            error!(
4813                "{} Rejecting new stream {} on draining connection",
4814                log_context!(self),
4815                stream_id
4816            );
4817            return None;
4818        }
4819        let highest_before = self.highest_peer_stream_id;
4820        let streams_before = self.streams.len();
4821        // Track the highest peer-initiated stream ID for GoAway frames
4822        // before any early return, so GoAway always reports the correct last stream.
4823        if stream_id > self.highest_peer_stream_id {
4824            self.highest_peer_stream_id = stream_id;
4825        }
4826        // highest_peer_stream_id is monotonic non-decreasing — it only ever
4827        // climbs to the largest id we have accepted (RFC 9113 §6.8 last-stream
4828        // reporting depends on this).
4829        debug_assert!(
4830            self.highest_peer_stream_id >= highest_before,
4831            "highest_peer_stream_id must never regress"
4832        );
4833        let global_stream_id = context.create_stream(
4834            Ulid::generate(),
4835            self.peer_settings.settings_initial_window_size,
4836        )?;
4837        self.last_stream_id = (stream_id + 2) & !1;
4838        self.streams.insert(stream_id, global_stream_id);
4839        self.stream_last_activity_at
4840            .insert(stream_id, Instant::now());
4841        // Post-conditions: the stream is now reachable in both indices, the
4842        // active count grew by exactly one (the id was not already present —
4843        // `handle_header_state` rejects re-used ids), and `last_stream_id` is
4844        // the even watermark just past this id so `new_stream_id` never collides.
4845        debug_assert_eq!(
4846            self.streams.get(&stream_id).copied(),
4847            Some(global_stream_id),
4848            "create_stream must register the wire->global mapping"
4849        );
4850        debug_assert!(
4851            self.stream_last_activity_at.contains_key(&stream_id),
4852            "create_stream must arm the per-stream idle timer"
4853        );
4854        debug_assert_eq!(
4855            self.streams.len(),
4856            streams_before + 1,
4857            "create_stream must add exactly one stream (id must not pre-exist)"
4858        );
4859        debug_assert!(
4860            self.last_stream_id > stream_id && self.last_stream_id & 1 == 0,
4861            "last_stream_id watermark must be the even value strictly above stream_id"
4862        );
4863        Some(global_stream_id)
4864    }
4865
4866    pub fn new_stream_id(&mut self) -> Option<StreamId> {
4867        let watermark_before = self.last_stream_id;
4868        let (issued, next) = next_stream_id(self.last_stream_id, self.position.is_client())?;
4869        self.last_stream_id = next;
4870        // Post-conditions: the locally-issued id has the parity of our role and
4871        // the watermark advanced strictly (so the next allocation cannot reuse
4872        // this id). `next_stream_id` already asserts parity vs `is_client`; here
4873        // we re-assert against `self.position` and the watermark monotonicity.
4874        debug_assert_eq!(
4875            issued & 1 == 1,
4876            self.position.is_client(),
4877            "locally-issued stream id parity must match our role"
4878        );
4879        debug_assert!(
4880            self.last_stream_id > watermark_before,
4881            "issuing a stream id must advance the watermark"
4882        );
4883        Some(issued)
4884    }
4885
4886    /// Test-only setter: jump `last_stream_id` close to [`STREAM_ID_MAX`] so
4887    /// that the next call to [`Self::new_stream_id`] exhausts the 31-bit
4888    /// space. FIX-22 ("Stream-ID exhaustion disconnects backend gracefully")
4889    /// exercises the `None`-return branch — reaching it through normal API
4890    /// usage would require issuing ~2³¹ requests, which is not tractable in
4891    /// an E2E harness.
4892    #[cfg(any(test, feature = "e2e-hooks"))]
4893    pub fn __test_set_last_stream_id(&mut self, id: StreamId) {
4894        self.last_stream_id = id;
4895    }
4896
4897    /// Cross-field invariant sweep for the H2 connection state machine,
4898    /// asserted as a run-to-completion post-condition at the end of every
4899    /// frame-handling pass (see the call in [`Self::handle_frame`]).
4900    ///
4901    /// These are relationships between *separate* fields that no single setter
4902    /// can guarantee on its own — exactly the class of bug TigerStyle's
4903    /// `check_invariants` targets. Each one is cheap (counter compares + a few
4904    /// `HashMap` membership probes); the whole function is `#[cfg(debug_assertions)]`
4905    /// and compiles out of release entirely.
4906    ///
4907    /// Encoded invariants:
4908    /// 1. **Stream-id watermark parity**: locally-issued ids never exceed
4909    ///    `STREAM_ID_MAX`; `last_stream_id` stays the even watermark (it is
4910    ///    rounded to `(id + 2) & !1` and initialised to 0).
4911    /// 2. **Per-stream caches are subsets of the live stream set**:
4912    ///    `stream_last_activity_at` is keyed only by currently-tracked stream
4913    ///    ids — a leak here would let a removed stream keep an idle timer and
4914    ///    mis-fire `cancel_timed_out_streams`. (`rst_sent` is intentionally NOT
4915    ///    a subset: a queued RST for an already-removed stream is legal.)
4916    /// 3. **RST queue accounting**: the never-decaying `total_rst_streams_queued`
4917    ///    lifetime counter is always `>=` the currently-pending queue length
4918    ///    (CVE-2025-8671 MadeYouReset cap relies on the lifetime counter never
4919    ///    under-counting), and the pending queue stays within its hard cap +1
4920    ///    (the escalation tripwire fires at the cap).
4921    /// 4. **Pending WINDOW_UPDATE bound**: the coalescing map never exceeds the
4922    ///    per-connection cap derived from `max_concurrent_streams`.
4923    /// 5. **Drain/state coupling**: a terminal `GoAway`/`Error` state implies the
4924    ///    connection is draining (`goaway()` sets both); the converse need not
4925    ///    hold (graceful drain stays in a live state).
4926    #[cfg(debug_assertions)]
4927    fn check_invariants<L>(&self, context: &Context<L>)
4928    where
4929        L: ListenerHandler + L7ListenerHandler,
4930    {
4931        // (1) Watermark parity and bound.
4932        debug_assert!(
4933            self.last_stream_id & 1 == 0,
4934            "last_stream_id must stay an even watermark, got {}",
4935            self.last_stream_id
4936        );
4937
4938        // (2) Per-stream caches are subsets of the live stream set, and every
4939        // mapping points at a valid context slot.
4940        debug_assert!(
4941            self.stream_last_activity_at
4942                .keys()
4943                .all(|id| self.streams.contains_key(id)),
4944            "stream_last_activity_at must only track currently-open stream ids"
4945        );
4946        debug_assert!(
4947            self.streams
4948                .values()
4949                .all(|&gid| gid < context.streams.len()),
4950            "every stream mapping must point at a valid context slot"
4951        );
4952
4953        // (3) RST queue accounting.
4954        debug_assert!(
4955            self.total_rst_streams_queued >= self.pending_rst_streams.len(),
4956            "queued-RST lifetime counter ({}) must be >= currently-pending queue ({})",
4957            self.total_rst_streams_queued,
4958            self.pending_rst_streams.len()
4959        );
4960        debug_assert!(
4961            self.pending_rst_streams.len() <= MAX_PENDING_RST_STREAMS + 1,
4962            "pending RST queue must stay within its hard cap (escalates at the cap)"
4963        );
4964
4965        // (4) Pending WINDOW_UPDATE coalescing map bound.
4966        debug_assert!(
4967            self.flow_control.pending_window_updates.len() <= self.max_pending_window_updates,
4968            "pending WINDOW_UPDATE map must stay within its per-connection cap"
4969        );
4970
4971        // (5) Drain/state coupling: terminal states imply draining.
4972        debug_assert!(
4973            !matches!(self.state, H2State::GoAway | H2State::Error) || self.drain.draining,
4974            "GoAway/Error state must imply the connection is draining"
4975        );
4976    }
4977
4978    fn handle_frame<E, L>(
4979        &mut self,
4980        frame: Frame,
4981        wire_payload_len: u32,
4982        context: &mut Context<L>,
4983        endpoint: E,
4984    ) -> MuxResult
4985    where
4986        E: Endpoint,
4987        L: ListenerHandler + L7ListenerHandler,
4988    {
4989        trace!("{} {:#?}", log_context!(self), frame);
4990        // Per-frame-type RX counter. Single chokepoint covers every H2 frame
4991        // type — adding a new `Frame::*` variant fails the build inside the
4992        // helper, keeping the metric breakdown in lock-step with RFC 9113 §6.
4993        count!(h2_frame_rx_metric_key(&frame), 1);
4994        let result = match frame {
4995            Frame::Data(data) => self.handle_data_frame(data, wire_payload_len, context, endpoint),
4996            Frame::Headers(headers) => self.handle_headers_frame(headers, context, endpoint),
4997            Frame::PushPromise(_) => self.handle_push_promise_frame(),
4998            Frame::Priority(priority) => self.handle_priority_frame(priority, context, endpoint),
4999            Frame::RstStream(rst_stream) => {
5000                self.handle_rst_stream_frame(rst_stream, context, endpoint)
5001            }
5002            Frame::Settings(settings) => self.handle_settings_frame(settings, context),
5003            Frame::Ping(ping) => self.handle_ping_frame(ping),
5004            Frame::GoAway(goaway) => self.handle_goaway_frame(goaway, context, endpoint),
5005            Frame::WindowUpdate(wu) => self.handle_window_update_frame(wu, context, endpoint),
5006            Frame::PriorityUpdate(pu) => self.handle_priority_update_frame(pu),
5007            Frame::Continuation(_) => {
5008                // Unreachable: standalone CONTINUATION is rejected in
5009                // `handle_header_state` (RFC 9113 §6.10) and in-block
5010                // CONTINUATION is consumed by the inline header-parsing
5011                // path. Keep a defensive fallback that returns
5012                // PROTOCOL_ERROR rather than panicking in debug builds.
5013                self.attribute_bytes_to_overhead();
5014                warn!(
5015                    "{} CONTINUATION frames are handled inline during header parsing",
5016                    log_context!(self)
5017                );
5018                self.goaway(H2Error::ProtocolError)
5019            }
5020            // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
5021            // The parser already consumed the payload; attribute the bytes
5022            // to connection-level overhead and continue.
5023            Frame::Unknown(raw) => {
5024                debug!(
5025                    "{} Ignoring unknown H2 frame type {}",
5026                    log_context!(self),
5027                    raw
5028                );
5029                self.attribute_bytes_to_overhead();
5030                MuxResult::Continue
5031            }
5032        };
5033        // Run-to-completion post-condition: the connection-level cross-field
5034        // invariants must hold after every frame is dispatched, on success and
5035        // on the protocol-error paths alike.
5036        #[cfg(debug_assertions)]
5037        self.check_invariants(context);
5038        result
5039    }
5040
5041    /// RFC 9110 §8.6: Content-Length validation must be skipped for responses
5042    /// where the body is absent by definition:
5043    /// - Responses to HEAD requests (any status)
5044    /// - 1xx informational responses
5045    /// - 204 No Content
5046    /// - 304 Not Modified
5047    fn content_length_exempt(
5048        &self,
5049        context: &crate::protocol::kawa_h1::editor::HttpContext,
5050    ) -> bool {
5051        use crate::protocol::kawa_h1::parser::Method;
5052        // HEAD method responses (only relevant when reading backend responses)
5053        if self.position.is_client() && context.method == Some(Method::Head) {
5054            return true;
5055        }
5056        // 1xx, 204, 304 status codes
5057        if let Some(status) = context.status {
5058            if (100..200).contains(&status) || status == 204 || status == 304 {
5059                return true;
5060            }
5061        }
5062        false
5063    }
5064
5065    fn handle_data_frame<E, L>(
5066        &mut self,
5067        data: parser::Data,
5068        wire_payload_len: u32,
5069        context: &mut Context<L>,
5070        mut endpoint: E,
5071    ) -> MuxResult
5072    where
5073        E: Endpoint,
5074        L: ListenerHandler + L7ListenerHandler,
5075    {
5076        // CVE-2019-9518: track empty DATA frames (no payload, no END_STREAM)
5077        if data.payload.is_empty() && !data.end_stream {
5078            let empty_before = self.flood_detector.empty_data_count;
5079            self.flood_detector.empty_data_count += 1;
5080            debug_assert_eq!(
5081                self.flood_detector.empty_data_count,
5082                empty_before + 1,
5083                "empty-DATA flood counter must advance by exactly one per empty frame"
5084            );
5085            check_flood_or_return!(self);
5086        }
5087        let Some(global_stream_id) = self.streams.get(&data.stream_id).copied() else {
5088            // The stream was terminated while data was expected,
5089            // probably due to automatic answer for invalid/unauthorized access.
5090            // RFC 9113 §6.9: we MUST still account for the DATA payload in
5091            // connection-level flow control using the full wire length
5092            // (including pad-length byte and padding), otherwise the window
5093            // shrinks permanently and eventually stalls the connection.
5094            self.flow_control.received_bytes_since_update += wire_payload_len;
5095            let conn_threshold = self.connection_config.initial_connection_window / 2;
5096            if self.flow_control.received_bytes_since_update >= conn_threshold {
5097                let increment = self.flow_control.received_bytes_since_update;
5098                self.queue_window_update(0, increment);
5099                self.flow_control.received_bytes_since_update = 0;
5100                self.readiness.arm_writable();
5101            }
5102            self.attribute_bytes_to_overhead();
5103            return MuxResult::Continue;
5104        };
5105        let mut slice = data.payload;
5106        let stream = &mut context.streams[global_stream_id];
5107        // Unpadded application payload size — what is forwarded to the backend
5108        // and counted against Content-Length.
5109        let content_len = slice.len();
5110        // Full wire-payload size (includes pad-length byte and padding).
5111        // RFC 9113 §5.2: padding counts against flow-control windows.
5112        let wire_len = wire_payload_len as usize;
5113        let cl_exempt = self.content_length_exempt(&stream.context);
5114
5115        // Extract declared content-length and update position-aware data counter
5116        let (data_received, declared_length) = {
5117            let parts = stream.split(&self.position);
5118            *parts.data_received += content_len;
5119            let total = *parts.data_received;
5120            let declared = match parts.rbuffer.body_size {
5121                kawa::BodySize::Length(n) => Some(n),
5122                _ => None,
5123            };
5124            (total, declared)
5125        };
5126
5127        // RFC 9113 §6.9 + §5.2: credit connection-level flow control BEFORE any
5128        // early-return path. Malformed DATA still consumed the peer's send
5129        // window; without crediting it back, repeated bad streams permanently
5130        // shrink the connection window and stall unrelated streams that share
5131        // the same H2 connection. Stream-level credit can stay below — once we
5132        // RST the violating stream, its per-stream window is moot per
5133        // RFC 9113 §6.9 (the receiver discards further frames for the stream).
5134        let conn_threshold = self.connection_config.initial_connection_window / 2;
5135        self.flow_control.received_bytes_since_update += wire_payload_len;
5136        if self.flow_control.received_bytes_since_update >= conn_threshold {
5137            let increment = self.flow_control.received_bytes_since_update;
5138            self.queue_window_update(0, increment);
5139            self.flow_control.received_bytes_since_update = 0;
5140        }
5141
5142        // RFC 9113 §8.1.1: if Content-Length is present, total DATA payload
5143        // must not exceed the declared length (check on every frame).
5144        // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses (body absent by definition).
5145        if !cl_exempt {
5146            if let Some(expected) = declared_length {
5147                if data_received > expected {
5148                    error!(
5149                        "{} Content-Length mismatch: received {} > declared {}",
5150                        log_context!(self),
5151                        data_received,
5152                        expected
5153                    );
5154                    // Pair WRITABLE arming with the queued connection-level
5155                    // WINDOW_UPDATE before returning; otherwise the credit sits
5156                    // until the next inbound frame on this connection.
5157                    if !self.flow_control.pending_window_updates.is_empty() {
5158                        self.readiness.arm_writable();
5159                    }
5160                    let result = self.reset_stream(
5161                        data.stream_id,
5162                        global_stream_id,
5163                        context,
5164                        endpoint,
5165                        H2Error::ProtocolError,
5166                    );
5167                    self.remove_dead_stream(data.stream_id, global_stream_id);
5168                    return result;
5169                }
5170            }
5171        }
5172
5173        let stream = &mut context.streams[global_stream_id];
5174        self.attribute_bytes_to_stream(&mut stream.metrics);
5175        let stream_state = stream.state;
5176        let is_unlinked = matches!(stream_state, StreamState::Unlinked);
5177        let parts = stream.split(&self.position);
5178        let kawa = parts.rbuffer;
5179        self.position.count_bytes_in(parts.metrics, content_len);
5180
5181        // Stream-level flow control (only if stream is still open).
5182        // Connection-level credit was already applied above the CL check so
5183        // malformed DATA cannot starve the connection window for other streams.
5184        if !data.end_stream {
5185            self.queue_window_update(data.stream_id, wire_payload_len);
5186        }
5187
5188        // If we have pending updates, ensure we get a writable event.
5189        // Must use signal_pending_write() — not just interest.insert() — because
5190        // under edge-triggered epoll the WRITABLE event bit may have been consumed
5191        // by a previous write cycle. Without the event bit set, filter_interest()
5192        // returns 0 and the WINDOW_UPDATEs never get flushed, stalling the client.
5193        if !self.flow_control.pending_window_updates.is_empty() {
5194            self.readiness.arm_writable();
5195        }
5196
5197        // Refresh per-stream idle timer on non-empty DATA.
5198        // Empty DATA frames (CVE-2019-9518 vector) must NOT reset the timer,
5199        // otherwise an attacker can keep a stream alive indefinitely with
5200        // zero-length frames while pinning a MAX_CONCURRENT_STREAMS slot.
5201        if content_len > 0 {
5202            if let Some(t) = self.stream_last_activity_at.get_mut(&data.stream_id) {
5203                *t = Instant::now();
5204            }
5205        }
5206
5207        if is_unlinked {
5208            // Backend is gone but client is still sending DATA.
5209            // Discard the data (flow control updates were already
5210            // queued above) to prevent the buffer from filling up.
5211            kawa.storage.clear();
5212            if data.end_stream {
5213                kawa.parsing_phase = kawa::ParsingPhase::Terminated;
5214                self.mark_end_of_stream(stream);
5215            }
5216        } else {
5217            // Advance storage.head by the full wire payload length so the
5218            // next frame doesn't read stale pad-length+padding bytes.
5219            slice.start = slice.start.saturating_add(kawa.storage.head as u32);
5220            kawa.storage.head += wire_len;
5221
5222            // Emit chunk framing for chunked transfer encoding (H2→H1 path).
5223            // H2 converter ignores ChunkHeader and end_chunk Flags, so this is safe for H2→H2.
5224            if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
5225                let hex_len = {
5226                    let mut buf = Vec::with_capacity(16);
5227                    let _ = write!(buf, "{content_len:x}");
5228                    buf
5229                };
5230                kawa.push_block(kawa::Block::ChunkHeader(kawa::ChunkHeader {
5231                    length: kawa::Store::from_vec(hex_len),
5232                }));
5233            }
5234
5235            kawa.push_block(kawa::Block::Chunk(kawa::Chunk {
5236                data: kawa::Store::Slice(slice),
5237            }));
5238
5239            if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
5240                kawa.push_block(kawa::Block::Flags(kawa::Flags {
5241                    end_body: false,
5242                    end_chunk: true,
5243                    end_header: false,
5244                    end_stream: false,
5245                }));
5246            }
5247
5248            if data.end_stream {
5249                // RFC 9113 §8.1.1: on end_stream, total DATA must equal Content-Length.
5250                // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
5251                if !cl_exempt {
5252                    if let Some(expected) = declared_length {
5253                        if data_received != expected {
5254                            error!(
5255                                "{} Content-Length mismatch: received {} != declared {}",
5256                                log_context!(self),
5257                                data_received,
5258                                expected
5259                            );
5260                            let result = self.reset_stream(
5261                                data.stream_id,
5262                                global_stream_id,
5263                                context,
5264                                endpoint,
5265                                H2Error::ProtocolError,
5266                            );
5267                            self.remove_dead_stream(data.stream_id, global_stream_id);
5268                            return result;
5269                        }
5270                    }
5271                }
5272                let is_chunked = kawa.body_size == kawa::BodySize::Chunked;
5273                kawa.push_block(kawa::Block::Flags(kawa::Flags {
5274                    end_body: true,
5275                    end_chunk: is_chunked,
5276                    end_header: false,
5277                    end_stream: true,
5278                }));
5279                kawa.parsing_phase = kawa::ParsingPhase::Terminated;
5280                self.mark_end_of_stream(stream);
5281            }
5282            if let StreamState::Linked(token) = stream_state {
5283                // Mirror of h1.rs:361-368 for the H2-backend → H2-frontend
5284                // path: edge-triggered epoll will NOT re-fire for bytes we
5285                // just pushed into stream.back; the synthetic event is the
5286                // only wake path. LIFECYCLE invariant 15.
5287                endpoint.readiness_mut(token).arm_writable();
5288                incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_DATA);
5289            }
5290        }
5291        MuxResult::Continue
5292    }
5293
5294    fn handle_headers_frame<E, L>(
5295        &mut self,
5296        headers: Headers,
5297        context: &mut Context<L>,
5298        mut endpoint: E,
5299    ) -> MuxResult
5300    where
5301        E: Endpoint,
5302        L: ListenerHandler + L7ListenerHandler,
5303    {
5304        // HEADERS frames represent real application activity (new request
5305        // or response). Reset the timeout since the peer is actively
5306        // communicating, unlike control frames (PING, WINDOW_UPDATE).
5307        self.timeout_container.reset();
5308        if !headers.end_headers {
5309            // CVE-2024-27316: only initialize tracking on the very first HEADERS
5310            // fragment, not on re-entries from ContinuationFrame (which call
5311            // handle_frame(Frame::Headers) with the accumulated header block).
5312            if self.flood_detector.continuation_count == 0 {
5313                self.flood_detector.accumulated_header_size = headers.header_block_fragment.len;
5314            }
5315            debug!(
5316                "{} FRAGMENT: stream_id={}, len={}",
5317                log_context!(self),
5318                headers.stream_id,
5319                self.zero.storage.data().len()
5320            );
5321            self.state = H2State::ContinuationHeader(headers);
5322            return MuxResult::Continue;
5323        }
5324        // Header block is complete — reset CONTINUATION counters
5325        self.flood_detector.reset_continuation();
5326        // can this fail?
5327        let stream_id = headers.stream_id;
5328        let Some(global_stream_id) = self.streams.get(&stream_id).copied() else {
5329            error!(
5330                "{} Handling Headers frame with no attached stream {:#?}",
5331                log_context!(self),
5332                self
5333            );
5334            incr!(names::h2::HEADERS_NO_STREAM_ERROR);
5335            self.attribute_bytes_to_overhead();
5336            return self.force_disconnect();
5337        };
5338
5339        // Refresh per-stream idle timer on HEADERS (response headers or trailers
5340        // on an existing stream). Initial HEADERS that create the stream already
5341        // set the timestamp in create_stream().
5342        if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
5343            *t = Instant::now();
5344        }
5345
5346        if let Some(priority) = &headers.priority {
5347            if self.prioriser.push_priority(stream_id, priority.clone()) {
5348                self.reset_stream(
5349                    stream_id,
5350                    global_stream_id,
5351                    context,
5352                    endpoint,
5353                    H2Error::ProtocolError,
5354                );
5355                self.remove_dead_stream(stream_id, global_stream_id);
5356                return MuxResult::Continue;
5357            }
5358        }
5359
5360        let stream = &mut context.streams[global_stream_id];
5361        self.attribute_bytes_to_stream(&mut stream.metrics);
5362        let kawa = &mut self.zero;
5363        let buffer = headers.header_block_fragment.data(kawa.storage.buffer());
5364        let stream = &mut context.streams[global_stream_id];
5365        let parts = &mut stream.split(&self.position);
5366        let was_initial = parts.rbuffer.is_initial();
5367        let elide_x_real_ip = parts.context.elide_x_real_ip;
5368        let status = pkawa::handle_header(
5369            &mut self.decoder,
5370            &mut self.prioriser,
5371            stream_id,
5372            parts.rbuffer,
5373            buffer,
5374            headers.end_stream,
5375            parts.context,
5376            self.flood_detector.config.max_header_list_size,
5377            self.flood_detector.config.max_header_fields,
5378            elide_x_real_ip,
5379        );
5380        kawa.storage.clear();
5381        if let Err((error, global)) = status {
5382            match self.position {
5383                Position::Client(..) => incr!(names::http::BACKEND_PARSE_ERRORS),
5384                Position::Server => incr!(names::http::FRONTEND_PARSE_ERRORS),
5385            }
5386            if global {
5387                error!(
5388                    "{} GOT GLOBAL ERROR WHILE PROCESSING HEADERS",
5389                    log_context!(self)
5390                );
5391                return self.goaway(error);
5392            } else {
5393                let result =
5394                    self.reset_stream(stream_id, global_stream_id, context, endpoint, error);
5395                self.remove_dead_stream(stream_id, global_stream_id);
5396                return result;
5397            }
5398        }
5399        if headers.end_stream {
5400            // RFC 9113 §8.1.1: when END_STREAM arrives via trailers,
5401            // validate that total DATA received matches Content-Length.
5402            // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
5403            if !was_initial && !self.content_length_exempt(&stream.context) {
5404                let parts = stream.split(&self.position);
5405                if let kawa::BodySize::Length(expected) = parts.rbuffer.body_size {
5406                    if *parts.data_received != expected {
5407                        error!(
5408                            "{} Content-Length mismatch on trailers: received {} != declared {}",
5409                            log_context!(self),
5410                            *parts.data_received,
5411                            expected
5412                        );
5413                        let result = self.reset_stream(
5414                            stream_id,
5415                            global_stream_id,
5416                            context,
5417                            endpoint,
5418                            H2Error::ProtocolError,
5419                        );
5420                        self.remove_dead_stream(stream_id, global_stream_id);
5421                        return result;
5422                    }
5423                }
5424            }
5425            self.mark_end_of_stream(stream);
5426        }
5427        if let StreamState::Linked(token) = stream.state {
5428            // Mirror of handle_data_frame's rearm. LIFECYCLE invariant 15.
5429            endpoint.readiness_mut(token).arm_writable();
5430            incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_HEADERS);
5431        }
5432        // was_initial prevents trailers from triggering connection
5433        if was_initial && self.position.is_server() {
5434            incr!(names::http::REQUESTS);
5435            gauge_add!(names::http::ACTIVE_REQUESTS, 1);
5436            stream.metrics.service_start();
5437            stream.request_counted = true;
5438            stream.state = StreamState::Link;
5439            context.pending_links.push_back(global_stream_id);
5440        }
5441        MuxResult::Continue
5442    }
5443
5444    fn handle_push_promise_frame(&mut self) -> MuxResult {
5445        self.attribute_bytes_to_overhead();
5446        match self.position {
5447            Position::Client(..) => {
5448                // RFC 9113 §8.4: Server push is deprecated. Sozu never sends
5449                // SETTINGS_ENABLE_PUSH=1, so receiving PUSH_PROMISE is a protocol error.
5450                error!(
5451                    "{} Received PUSH_PROMISE but server push is not supported",
5452                    log_context!(self)
5453                );
5454                self.goaway(H2Error::ProtocolError)
5455            }
5456            Position::Server => {
5457                // Clients must never send PUSH_PROMISE (RFC 9113 §8.4)
5458                error!("{} Received PUSH_PROMISE from client", log_context!(self));
5459                self.goaway(H2Error::ProtocolError)
5460            }
5461        }
5462    }
5463
5464    fn handle_priority_frame<E, L>(
5465        &mut self,
5466        priority: parser::Priority,
5467        context: &mut Context<L>,
5468        endpoint: E,
5469    ) -> MuxResult
5470    where
5471        E: Endpoint,
5472        L: ListenerHandler + L7ListenerHandler,
5473    {
5474        if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
5475            let stream = &mut context.streams[global_stream_id];
5476            self.attribute_bytes_to_stream(&mut stream.metrics);
5477        } else {
5478            self.attribute_bytes_to_overhead();
5479        }
5480        // Pass 3 Medium #4: standalone PRIORITY frames can arrive for any
5481        // peer-chosen stream ID. Accept only currently-open streams and a
5482        // small idle look-ahead window; everything else is dropped before
5483        // it can feed memory into the priority map.
5484        if self.prioriser.push_priority_guarded(
5485            priority.stream_id,
5486            priority.inner,
5487            self.last_stream_id,
5488            &self.streams,
5489        ) {
5490            if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
5491                let result = self.reset_stream(
5492                    priority.stream_id,
5493                    global_stream_id,
5494                    context,
5495                    endpoint,
5496                    H2Error::ProtocolError,
5497                );
5498                self.remove_dead_stream(priority.stream_id, global_stream_id);
5499                return result;
5500            } else {
5501                error!(
5502                    "{} INVALID PRIORITY RECEIVED ON INVALID STREAM",
5503                    log_context!(self)
5504                );
5505                return self.goaway(H2Error::ProtocolError);
5506            }
5507        }
5508        MuxResult::Continue
5509    }
5510
5511    /// RFC 9218 §7.1: PRIORITY_UPDATE reprioritizes an open or idle-soon
5512    /// stream at the connection level. Decodes the priority field value
5513    /// (same grammar as the `priority` request header, `parse_rfc9218_priority`)
5514    /// and pushes it into the `Prioriser` through the same guarded path used
5515    /// for standalone PRIORITY frames — the guard bounds memory against a
5516    /// client spamming PRIORITY_UPDATE for far-future stream IDs.
5517    ///
5518    /// Prioritized stream ID `0` is a connection-level `PROTOCOL_ERROR`
5519    /// (RFC 9218 §7.1). For any other ID that is not currently open or
5520    /// within the idle look-ahead budget, the update is silently dropped
5521    /// (matches the PRIORITY-frame guard semantics — no state change).
5522    fn handle_priority_update_frame(&mut self, pu: parser::PriorityUpdate) -> MuxResult {
5523        self.attribute_bytes_to_overhead();
5524        if pu.prioritized_stream_id == 0 {
5525            error!(
5526                "{} PRIORITY_UPDATE with prioritized_stream_id=0 (RFC 9218 §7.1)",
5527                log_context!(self)
5528            );
5529            return self.goaway(H2Error::ProtocolError);
5530        }
5531        let (urgency, incremental) = pkawa::parse_rfc9218_priority(&pu.priority_field_value);
5532        let (prev_urgency, _) = self.prioriser.get(&pu.prioritized_stream_id);
5533        trace!(
5534            "{} PRIORITY_UPDATE stream={} urgency={}->{} incremental={} rearmed_writable=true",
5535            log_context!(self),
5536            pu.prioritized_stream_id,
5537            prev_urgency,
5538            urgency,
5539            incremental
5540        );
5541        let _ = self.prioriser.push_priority_guarded(
5542            pu.prioritized_stream_id,
5543            parser::PriorityPart::Rfc9218 {
5544                urgency,
5545                incremental,
5546            },
5547            self.last_stream_id,
5548            &self.streams,
5549        );
5550        // LIFECYCLE invariant 15: reprioritisation only changes ordering for
5551        // the NEXT write pass. Under ET epoll, if finalize_write already
5552        // stripped WRITABLE, the scheduler won't re-run without a synthetic
5553        // wake — pair the interest insert with signal_pending_write.
5554        self.readiness.arm_writable();
5555        incr!(names::h2::SIGNAL_WRITABLE_REARMED_PRIORITY_UPDATE);
5556        MuxResult::Continue
5557    }
5558
5559    fn handle_rst_stream_frame<E, L>(
5560        &mut self,
5561        rst_stream: parser::RstStream,
5562        context: &mut Context<L>,
5563        mut endpoint: E,
5564    ) -> MuxResult
5565    where
5566        E: Endpoint,
5567        L: ListenerHandler + L7ListenerHandler,
5568    {
5569        // Per-error-code counter for the inbound RST. Emitted before the
5570        // flood-detector trip check so even a connection that gets terminated
5571        // by `handle_flood_violation` shows up in the per-code breakdown
5572        // (the dedicated `h2.flood.violation.rst_stream_*` series tracks the
5573        // mitigation event itself).
5574        count!(metric_for_rst_stream_received(rst_stream.error_code), 1);
5575        // CVE-2023-44487 Rapid Reset + CVE-2019-9514: track RST_STREAM rate.
5576        let rst_count_before = self.flood_detector.rst_stream_count;
5577        self.flood_detector.rst_stream_count += 1;
5578        debug_assert_eq!(
5579            self.flood_detector.rst_stream_count,
5580            rst_count_before + 1,
5581            "per-window RST_STREAM counter must advance by exactly one per inbound RST"
5582        );
5583        check_flood_or_return!(self);
5584        // Additional CVE-2023-44487 mitigation: lifetime cap on RST_STREAM
5585        // frames received. The per-window counter above half-decays, so a
5586        // patient client can keep ~50 RST/s forever; a never-decaying
5587        // lifetime counter puts an absolute ceiling on that amplification.
5588        // Streams whose backend response has not yet started count toward a
5589        // much lower "abusive" ceiling — this is the signature Rapid Reset
5590        // pattern where the attacker pays one RST frame and we pay a
5591        // backend round-trip for each.
5592        //
5593        // "Response started" here means the Server has begun producing
5594        // response bytes (backend kawa buffer past its initial phase). For
5595        // the Client position the concept does not apply symmetrically
5596        // (RSTs received from the backend are rare and benign), so we
5597        // conservatively flag them as abusive too — lifetime cap still
5598        // dominates in practice.
5599        let response_started = match self.streams.get(&rst_stream.stream_id) {
5600            Some(global_stream_id) => {
5601                let stream = &context.streams[*global_stream_id];
5602                !stream.back.is_initial()
5603            }
5604            // Stream already gone (e.g. closed, not yet registered) —
5605            // treat as response-started to avoid over-counting benign
5606            // races as abusive.
5607            None => true,
5608        };
5609        if let Some(violation) = self.flood_detector.record_rst_lifetime(response_started) {
5610            return self.handle_flood_violation(violation);
5611        }
5612        // Rapid Reset signature (CVE-2023-44487): a RST that arrives before the
5613        // backend has begun answering. Emitted alongside the per-code counter
5614        // so the SOC can alert on the rate of pre-response RSTs without
5615        // having to differentiate by error code.
5616        if !response_started {
5617            count!(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START, 1);
5618        }
5619        debug!(
5620            "{} RstStream({} -> {})",
5621            log_context!(self),
5622            rst_stream.error_code,
5623            H2Error::try_from(rst_stream.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str())
5624        );
5625        // Compute totals before removing the stream from the map,
5626        // so the removed stream's bytes are included in the total.
5627        let rst_byte_totals = self.compute_stream_byte_totals(context);
5628        if let Some(global_stream_id) = self.streams.get(&rst_stream.stream_id).copied() {
5629            let stream = &mut context.streams[global_stream_id];
5630            self.attribute_bytes_to_stream(&mut stream.metrics);
5631            let linked_token = stream.linked_token();
5632            let (client_rtt, server_rtt) =
5633                Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
5634            if let Some(token) = linked_token {
5635                endpoint.end_stream(token, global_stream_id, context);
5636            }
5637            let stream = &mut context.streams[global_stream_id];
5638            match &self.position {
5639                // Inbound RST_STREAM on the backend side terminates the in-flight
5640                // request without going through Connection::end_stream (the normal
5641                // place where Backend.active_requests is decremented), so do the
5642                // bookkeeping explicitly here to avoid leaking load counters.
5643                Position::Client(_, backend, BackendStatus::Connected) => {
5644                    let mut backend_borrow = backend.borrow_mut();
5645                    backend_borrow.active_requests =
5646                        backend_borrow.active_requests.saturating_sub(1);
5647                }
5648                Position::Client(..) => {}
5649                Position::Server => {
5650                    self.distribute_overhead(&mut stream.metrics, rst_byte_totals);
5651                    // This is a special case, normally, all stream are terminated by the server
5652                    // when the last byte of the response is written. Here, the reset is requested
5653                    // on the server endpoint and immediately terminates, shortcutting the other path
5654                    stream.metrics.backend_stop();
5655                    stream.generate_access_log(
5656                        true,
5657                        Some("H2::ResetFrame"),
5658                        context.listener.clone(),
5659                        client_rtt,
5660                        server_rtt,
5661                    );
5662                    stream.state = StreamState::Recycle;
5663                }
5664            }
5665            // Retire from streams/prioriser/stream_last_activity_at and
5666            // invalidate expect_write/expect_read if they reference this gid.
5667            self.remove_dead_stream(rst_stream.stream_id, global_stream_id);
5668        } else {
5669            self.attribute_bytes_to_overhead();
5670        }
5671        MuxResult::Continue
5672    }
5673
5674    fn handle_settings_frame<L>(
5675        &mut self,
5676        settings: parser::Settings,
5677        context: &mut Context<L>,
5678    ) -> MuxResult
5679    where
5680        L: ListenerHandler + L7ListenerHandler,
5681    {
5682        if settings.ack {
5683            // RFC 9113 §6.5: SETTINGS ACK must have empty payload
5684            if !settings.settings.is_empty() {
5685                error!("{} SETTINGS ACK with non-empty payload", log_context!(self));
5686                return self.goaway(H2Error::FrameSizeError);
5687            }
5688            // RFC 9113 §6.5: peer acknowledged our SETTINGS — clear timeout
5689            self.settings_sent_at = None;
5690            // RFC 7541 §4.2: sync the decoder's max allowed table size with
5691            // what we advertised. Currently a no-op (settings don't change at
5692            // runtime), but guards against future runtime SETTINGS updates.
5693            self.decoder.set_max_allowed_table_size(
5694                self.local_settings.settings_header_table_size as usize,
5695            );
5696            self.attribute_bytes_to_overhead();
5697            return MuxResult::Continue;
5698        }
5699        // CVE-2019-9515: track SETTINGS frame rate
5700        let settings_count_before = self.flood_detector.settings_count;
5701        let settings_lifetime_before = self.flood_detector.total_settings_received_lifetime;
5702        self.flood_detector.settings_count += 1;
5703        self.flood_detector.total_settings_received_lifetime = self
5704            .flood_detector
5705            .total_settings_received_lifetime
5706            .saturating_add(1);
5707        debug_assert_eq!(
5708            self.flood_detector.settings_count,
5709            settings_count_before + 1,
5710            "per-window SETTINGS counter must advance by one per non-ACK SETTINGS"
5711        );
5712        debug_assert!(
5713            self.flood_detector.total_settings_received_lifetime > settings_lifetime_before
5714                || settings_lifetime_before == u32::MAX,
5715            "lifetime SETTINGS counter must advance (or already be saturated)"
5716        );
5717        check_flood_or_return!(self);
5718        for setting in settings.settings {
5719            let v = setting.value;
5720            let mut is_error = false;
5721            #[rustfmt::skip]
5722            match setting.identifier {
5723                parser::SETTINGS_HEADER_TABLE_SIZE => {
5724                    // Cap to the configured maximum — a malicious peer can
5725                    // advertise up to 4 GB to inflate HPACK encoder memory.
5726                    let cap = self.flood_detector.config.max_header_table_size;
5727                    let capped = v.min(cap);
5728                    self.peer_settings.settings_header_table_size = capped;
5729                    self.encoder.set_max_table_size(capped as usize);
5730                    // RFC 7541 §4.2 / §6.3: queue a dynamic-table-size-update
5731                    // HPACK directive for the next header block we emit.
5732                    // Without it, the peer's decoder keeps its previous (possibly
5733                    // larger) table cap and our encoder-side change is silent
5734                    // — conformance suites (h2spec `hpack/4.2`) will flag it.
5735                    self.pending_table_size_update = Some(capped);
5736                },
5737                parser::SETTINGS_ENABLE_PUSH       => { self.peer_settings.settings_enable_push = v == 1;             is_error |= v > 1 },
5738                parser::SETTINGS_MAX_CONCURRENT_STREAMS => { self.peer_settings.settings_max_concurrent_streams = v },
5739                parser::SETTINGS_INITIAL_WINDOW_SIZE    => { is_error |= self.update_initial_window_size(v, context) },
5740                parser::SETTINGS_MAX_FRAME_SIZE         => { self.peer_settings.settings_max_frame_size = v;           is_error |= !(MIN_MAX_FRAME_SIZE..MAX_MAX_FRAME_SIZE).contains(&v) },
5741                parser::SETTINGS_MAX_HEADER_LIST_SIZE   => { self.peer_settings.settings_max_header_list_size = v },
5742                parser::SETTINGS_ENABLE_CONNECT_PROTOCOL => { self.peer_settings.settings_enable_connect_protocol = v == 1; is_error |= v > 1 },
5743                parser::SETTINGS_NO_RFC7540_PRIORITIES   => { self.peer_settings.settings_no_rfc7540_priorities = v == 1;   is_error |= v > 1 },
5744                other => { warn!("Unknown setting_id: {}, we MUST ignore this", other); self.flood_detector.glitch_count += 1 },
5745            };
5746            if is_error {
5747                error!("{} INVALID SETTING", log_context!(self));
5748                return self.goaway(H2Error::ProtocolError);
5749            }
5750        }
5751
5752        self.attribute_bytes_to_overhead();
5753
5754        // Enlarge the connection-level receive window for backend H2
5755        // connections (Position::Client). The server side does this in
5756        // the ServerSettings writable path, but the client needs to do
5757        // it here after receiving the server's initial SETTINGS.
5758        if self.position.is_client()
5759            && self.flow_control.window <= DEFAULT_INITIAL_WINDOW_SIZE as i32
5760        {
5761            let increment = self
5762                .connection_config
5763                .initial_connection_window
5764                .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
5765            if increment > 0 {
5766                self.queue_window_update(0, increment);
5767            }
5768            // Do NOT increment flow_control.window here: sending our own
5769            // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
5770            // Our send window is only updated by WINDOW_UPDATEs we receive
5771            // from the peer (RFC 9113 §6.9).
5772        }
5773
5774        let kawa = &mut self.zero;
5775        let ack = &serializer::SETTINGS_ACKNOWLEDGEMENT;
5776        let buf = kawa.storage.space();
5777        if buf.len() < ack.len() {
5778            error!(
5779                "{} No space in zero buffer for SETTINGS ACK ({} available, {} needed)",
5780                log_context!(self),
5781                buf.len(),
5782                ack.len()
5783            );
5784            return self.force_disconnect();
5785        }
5786        buf[..ack.len()].copy_from_slice(ack);
5787        kawa.storage.fill(ack.len());
5788
5789        self.readiness.interest.insert(Ready::WRITABLE);
5790        self.readiness.interest.remove(Ready::READABLE);
5791        self.expect_write = Some(H2StreamId::Zero);
5792        self.readiness.signal_pending_write();
5793        MuxResult::Continue
5794    }
5795
5796    fn handle_ping_frame(&mut self, ping: parser::Ping) -> MuxResult {
5797        if ping.ack {
5798            self.attribute_bytes_to_overhead();
5799            return MuxResult::Continue;
5800        }
5801        // CVE-2019-9512: track non-ACK PING frame rate
5802        let ping_count_before = self.flood_detector.ping_count;
5803        let ping_lifetime_before = self.flood_detector.total_ping_received_lifetime;
5804        self.flood_detector.ping_count += 1;
5805        self.flood_detector.total_ping_received_lifetime = self
5806            .flood_detector
5807            .total_ping_received_lifetime
5808            .saturating_add(1);
5809        debug_assert_eq!(
5810            self.flood_detector.ping_count,
5811            ping_count_before + 1,
5812            "per-window PING counter must advance by one per non-ACK PING"
5813        );
5814        debug_assert!(
5815            self.flood_detector.total_ping_received_lifetime > ping_lifetime_before
5816                || ping_lifetime_before == u32::MAX,
5817            "lifetime PING counter must advance (or already be saturated)"
5818        );
5819        check_flood_or_return!(self);
5820        self.attribute_bytes_to_overhead();
5821        let kawa = &mut self.zero;
5822        let ping_response_size = serializer::PING_ACKNOWLEDGEMENT_HEADER.len() + 8;
5823        if kawa.storage.space().len() < ping_response_size {
5824            error!(
5825                "{} No space in zero buffer for PING response ({} available, {} needed)",
5826                log_context!(self),
5827                kawa.storage.space().len(),
5828                ping_response_size
5829            );
5830            return self.force_disconnect();
5831        }
5832        match serializer::gen_ping_acknowledgement(kawa.storage.space(), &ping.payload) {
5833            Ok((_, size)) => {
5834                kawa.storage.fill(size);
5835                incr!(names::h2::FRAMES_TX_PING_ACK);
5836            }
5837            Err(error) => {
5838                error!(
5839                    "{} Could not serialize PingFrame: {:?}",
5840                    log_context!(self),
5841                    error
5842                );
5843                return self.force_disconnect();
5844            }
5845        };
5846        self.readiness.interest.insert(Ready::WRITABLE);
5847        self.readiness.interest.remove(Ready::READABLE);
5848        self.expect_write = Some(H2StreamId::Zero);
5849        self.readiness.signal_pending_write();
5850        MuxResult::Continue
5851    }
5852
5853    fn handle_goaway_frame<E, L>(
5854        &mut self,
5855        goaway: parser::GoAway,
5856        context: &mut Context<L>,
5857        mut endpoint: E,
5858    ) -> MuxResult
5859    where
5860        E: Endpoint,
5861        L: ListenerHandler + L7ListenerHandler,
5862    {
5863        self.attribute_bytes_to_overhead();
5864        let error_name =
5865            H2Error::try_from(goaway.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str());
5866        if goaway.error_code == H2Error::NoError as u32 {
5867            debug!(
5868                "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5869                log_context!(self),
5870                goaway.last_stream_id,
5871                error_name,
5872                goaway.additional_debug_data
5873            );
5874        } else {
5875            // Peer-originated failure: no variant of H2Error from a peer
5876            // implies a sozu bug. Impact handling is separate (retry above
5877            // `last_stream_id`, RST_STREAM for consumed streams) and logs
5878            // its own details below, so the summary drops to `warn!`.
5879            warn!(
5880                "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5881                log_context!(self),
5882                goaway.last_stream_id,
5883                error_name,
5884                goaway.additional_debug_data
5885            );
5886        }
5887        count!(metric_for_goaway_received(goaway.error_code), 1);
5888        // RFC 9113 §6.8: begin graceful drain.
5889        self.drain.draining = true;
5890        self.drain.peer_last_stream_id = Some(goaway.last_stream_id);
5891
5892        // Streams with ID > last_stream_id were NOT processed by the peer.
5893        // Mark them for retry (StreamState::Link) so they can be retried
5894        // on a new connection.
5895        // IMPORTANT: do NOT call endpoint.end_stream() here — that would
5896        // remove the stream from the frontend's H2 stream map and send
5897        // RST_STREAM to the client, killing the request instead of retrying it.
5898        let mut retry_streams = Vec::new();
5899        for (&stream_id, &global_stream_id) in &self.streams {
5900            if stream_id > goaway.last_stream_id {
5901                retry_streams.push((stream_id, global_stream_id));
5902            }
5903        }
5904        for (stream_id, global_stream_id) in &retry_streams {
5905            // Remove from reverse index before transitioning away from Linked.
5906            if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5907                remove_backend_stream(&mut context.backend_streams, token, *global_stream_id);
5908            }
5909            let stream = &mut context.streams[*global_stream_id];
5910            if stream.front.consumed {
5911                // Request was already sent to this backend — we can't
5912                // replay it. Use the linked token's readiness (via endpoint)
5913                // so the RST_STREAM reaches the client.
5914                debug!(
5915                    "{} GOAWAY: stream {} already consumed, cannot retry",
5916                    log_context!(self),
5917                    stream_id
5918                );
5919                if let StreamState::Linked(token) = stream.state {
5920                    let front_readiness = endpoint.readiness_mut(token);
5921                    forcefully_terminate_answer(stream, front_readiness, H2Error::RefusedStream);
5922                } else {
5923                    warn!(
5924                        "{} GOAWAY: stream {} consumed but not Linked, cannot notify frontend",
5925                        log_context!(self),
5926                        stream_id
5927                    );
5928                }
5929            } else {
5930                stream.state = StreamState::Link;
5931                context.pending_links.push_back(*global_stream_id);
5932            }
5933            // Both retry (!consumed) and terminated (consumed) paths remove the
5934            // stream from self.streams without going through Connection::end_stream,
5935            // so decrement Backend.active_requests here to keep load metrics honest.
5936            if let Position::Client(_, backend, BackendStatus::Connected) = &self.position {
5937                let mut backend_borrow = backend.borrow_mut();
5938                backend_borrow.active_requests = backend_borrow.active_requests.saturating_sub(1);
5939            }
5940            // Retire from streams/prioriser/stream_last_activity_at and
5941            // invalidate expect_write/expect_read if they reference this gid.
5942            self.remove_dead_stream(*stream_id, *global_stream_id);
5943        }
5944
5945        // If no active streams remain, close immediately
5946        if self.streams.is_empty() {
5947            return self.goaway(H2Error::NoError);
5948        }
5949
5950        // Otherwise, let remaining streams (ID <= last_stream_id) complete.
5951        // The connection will be closed when all streams finish.
5952        MuxResult::Continue
5953    }
5954
5955    fn handle_window_update_frame<E, L>(
5956        &mut self,
5957        wu: WindowUpdate,
5958        context: &mut Context<L>,
5959        endpoint: E,
5960    ) -> MuxResult
5961    where
5962        E: Endpoint,
5963        L: ListenerHandler + L7ListenerHandler,
5964    {
5965        let stream_id = wu.stream_id;
5966        let increment = wu.increment;
5967
5968        // RFC 9113 §6.9: increment of 0 MUST be treated as an error.
5969        // Connection-level (stream 0) -> connection error (GOAWAY).
5970        // Stream-level -> stream error (RST_STREAM).
5971        if increment == 0 {
5972            if stream_id == 0 {
5973                error!(
5974                    "{} WINDOW_UPDATE with zero increment on connection (stream 0)",
5975                    log_context!(self)
5976                );
5977                return self.goaway(H2Error::ProtocolError);
5978            } else {
5979                error!(
5980                    "{} WINDOW_UPDATE with zero increment on stream {}",
5981                    log_context!(self),
5982                    stream_id
5983                );
5984                if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5985                    let result = self.reset_stream(
5986                        stream_id,
5987                        global_stream_id,
5988                        context,
5989                        endpoint,
5990                        H2Error::ProtocolError,
5991                    );
5992                    self.remove_dead_stream(stream_id, global_stream_id);
5993                    return result;
5994                }
5995                // Stream not in map (already closed) — treat as glitch
5996                self.flood_detector.glitch_count += 1;
5997                check_flood_or_return!(self);
5998                self.attribute_bytes_to_overhead();
5999                return MuxResult::Continue;
6000            }
6001        }
6002
6003        // The parser masks the reserved bit (STREAM_ID_MASK), so increment <=
6004        // 2^31-1 and try_from always succeeds. Use try_from rather than `as` to
6005        // guard against a future parser change that drops the mask.
6006        let increment = i32::try_from(increment).unwrap_or(i32::MAX);
6007        // RFC 9113 §6.9: a non-zero WINDOW_UPDATE increment is in [1, 2^31-1].
6008        // Zero was short-circuited above; this asserts the masked value is a
6009        // legal positive increment before we add it to a window.
6010        debug_assert!(
6011            increment > 0,
6012            "WINDOW_UPDATE increment must be strictly positive at this point (zero handled above)"
6013        );
6014        if stream_id == 0 {
6015            // Count connection-level WINDOW_UPDATEs before touching the window
6016            // so a per-window flood stops us before we pay the arithmetic cost
6017            // on a million-frame burst. Zero-increment frames short-circuited
6018            // above, so every increment here is a legal-looking rate consumer.
6019            let wu0_before = self.flood_detector.window_update_stream0_count;
6020            self.flood_detector.window_update_stream0_count = self
6021                .flood_detector
6022                .window_update_stream0_count
6023                .saturating_add(1);
6024            debug_assert!(
6025                self.flood_detector.window_update_stream0_count > wu0_before
6026                    || wu0_before == u32::MAX,
6027                "stream-0 WINDOW_UPDATE flood counter must advance before the flood check"
6028            );
6029            check_flood_or_return!(self);
6030            self.attribute_bytes_to_overhead();
6031            let window_before = self.flow_control.window;
6032            if let Some(window) = self.flow_control.window.checked_add(increment) {
6033                if self.flow_control.window <= 0 && window > 0 {
6034                    self.readiness.arm_writable();
6035                }
6036                self.flow_control.window = window;
6037                // Flow-control replenish invariant (RFC 9113 §6.9): the
6038                // connection send window grows by exactly `increment` and stays
6039                // within i32 (the `checked_add` already rejected overflow, which
6040                // is a FLOW_CONTROL_ERROR on the wire). The window may legally
6041                // be negative (a SETTINGS change can shrink it below zero) but
6042                // a WINDOW_UPDATE only ever increases it.
6043                debug_assert_eq!(
6044                    self.flow_control.window,
6045                    window_before + increment,
6046                    "connection window must increase by exactly the increment"
6047                );
6048                debug_assert!(
6049                    self.flow_control.window > window_before,
6050                    "a positive WINDOW_UPDATE must strictly grow the connection window"
6051                );
6052                debug!(
6053                    "{} WINDOW_UPDATE received: stream=0 increment={} new_connection_window={}",
6054                    log_context!(self),
6055                    increment,
6056                    self.flow_control.window
6057                );
6058            } else {
6059                error!("{} INVALID WINDOW INCREMENT", log_context!(self));
6060                return self.goaway(H2Error::FlowControlError);
6061            }
6062        } else if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
6063            let stream = &mut context.streams[global_stream_id];
6064            self.attribute_bytes_to_stream(&mut stream.metrics);
6065            let stream_window_before = stream.window;
6066            if let Some(window) = stream.window.checked_add(increment) {
6067                if stream.window <= 0 && window > 0 {
6068                    self.readiness.arm_writable();
6069                }
6070                stream.window = window;
6071                // Same replenish invariant as the connection window, applied to
6072                // the per-stream send window (RFC 9113 §6.9.1). Overflow past
6073                // 2^31-1 is rejected by `checked_add` and handled as a
6074                // FLOW_CONTROL_ERROR RST_STREAM below.
6075                debug_assert_eq!(
6076                    stream.window,
6077                    stream_window_before + increment,
6078                    "stream window must increase by exactly the increment"
6079                );
6080                debug_assert!(
6081                    stream.window > stream_window_before,
6082                    "a positive WINDOW_UPDATE must strictly grow the stream window"
6083                );
6084                debug!(
6085                    "{} WINDOW_UPDATE received: stream={} increment={} new_stream_window={}",
6086                    log_context!(self),
6087                    stream_id,
6088                    increment,
6089                    stream.window
6090                );
6091            } else {
6092                let result = self.reset_stream(
6093                    stream_id,
6094                    global_stream_id,
6095                    context,
6096                    endpoint,
6097                    H2Error::FlowControlError,
6098                );
6099                self.remove_dead_stream(stream_id, global_stream_id);
6100                return result;
6101            }
6102        } else {
6103            self.attribute_bytes_to_overhead();
6104            trace!(
6105                "{} Ignoring window update on closed stream {}: {}",
6106                log_context!(self),
6107                stream_id,
6108                increment
6109            );
6110            // Pass 3 Low #5: WINDOW_UPDATE on a closed stream is legal
6111            // (RFC 9113 §6.9.1) but has no useful effect, so a peer that
6112            // keeps sending them is wasting our cycles. Count it as a
6113            // glitch so a flood contributes to `check_flood()` and can
6114            // eventually trigger ENHANCE_YOUR_CALM.
6115            self.flood_detector.glitch_count += 1;
6116            check_flood_or_return!(self);
6117        }
6118        MuxResult::Continue
6119    }
6120
6121    fn update_initial_window_size<L>(&mut self, value: u32, context: &mut Context<L>) -> bool
6122    where
6123        L: ListenerHandler + L7ListenerHandler,
6124    {
6125        if value > FLOW_CONTROL_MAX_WINDOW {
6126            return true;
6127        }
6128        let delta = match i32::try_from(
6129            value as i64 - self.peer_settings.settings_initial_window_size as i64,
6130        ) {
6131            Ok(d) => d,
6132            Err(_) => {
6133                error!("{} initial window size delta overflow", log_context!(self));
6134                return true;
6135            }
6136        };
6137        let mut open_window = false;
6138        // Only update windows for streams owned by this connection
6139        for &global_stream_id in self.streams.values() {
6140            let stream = &mut context.streams[global_stream_id];
6141            // RFC 9113 §6.9.2: changes to SETTINGS_INITIAL_WINDOW_SIZE can cause
6142            // stream windows to exceed 2^31-1, which is a flow control error.
6143            match stream.window.checked_add(delta) {
6144                Some(new_window) => {
6145                    open_window |= stream.window <= 0 && new_window > 0;
6146                    stream.window = new_window;
6147                }
6148                None => return true,
6149            }
6150        }
6151        trace!(
6152            "{} UPDATE INIT WINDOW: {} {} {:?}",
6153            log_context!(self),
6154            delta,
6155            open_window,
6156            self.readiness
6157        );
6158        if open_window {
6159            self.readiness.arm_writable();
6160        }
6161        self.peer_settings.settings_initial_window_size = value;
6162        false
6163    }
6164
6165    pub fn force_disconnect(&mut self) -> MuxResult {
6166        self.state = H2State::Error;
6167        match &mut self.position {
6168            Position::Client(_, _, status) => {
6169                *status = BackendStatus::Disconnecting;
6170                self.readiness.event = Ready::HUP;
6171                debug!(
6172                    "{} H2 force_disconnect client: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
6173                    log_context!(self),
6174                    self.state,
6175                    self.streams.len(),
6176                    self.expect_write,
6177                    self.socket.socket_wants_write(),
6178                    self.readiness
6179                );
6180                MuxResult::Continue
6181            }
6182            Position::Server => {
6183                if self.peer_gone_after_final_goaway() {
6184                    return MuxResult::CloseSession;
6185                }
6186                // Don't disconnect immediately if rustls still has buffered TLS
6187                // records. Returning CloseSession here triggers shutdown(Write)
6188                // which sends FIN — but any TLS records still in rustls's buffer
6189                // (not yet flushed to the TCP send buffer) are lost, causing the
6190                // client to see "TLS decode error / unexpected eof".
6191                // Instead, keep WRITABLE interest and let the writable path flush.
6192                if self.socket.socket_wants_write() {
6193                    debug!(
6194                        "{} H2 force_disconnect delaying close: state={:?}, streams={}, expect_write={:?}, wants_write=true, readiness={:?}",
6195                        log_context!(self),
6196                        self.state,
6197                        self.streams.len(),
6198                        self.expect_write,
6199                        self.readiness
6200                    );
6201                    self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
6202                    self.ensure_tls_flushed();
6203                    MuxResult::Continue
6204                } else {
6205                    debug!(
6206                        "{} H2 force_disconnect closing session: state={:?}, streams={}, expect_write={:?}, wants_write=false, readiness={:?}",
6207                        log_context!(self),
6208                        self.state,
6209                        self.streams.len(),
6210                        self.expect_write,
6211                        self.readiness
6212                    );
6213                    MuxResult::CloseSession
6214                }
6215            }
6216        }
6217    }
6218
6219    pub fn close<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E)
6220    where
6221        E: Endpoint,
6222        L: ListenerHandler + L7ListenerHandler,
6223    {
6224        match self.position {
6225            Position::Client(_, _, BackendStatus::KeepAlive) => {
6226                error!(
6227                    "{} H2 connections do not use KeepAlive backend status",
6228                    log_context!(self)
6229                );
6230                return;
6231            }
6232            Position::Client(..) => {}
6233            Position::Server => {
6234                let tls_pending_before = self.socket.socket_wants_write();
6235                if !self.streams.is_empty() || tls_pending_before || self.expect_write.is_some() {
6236                    debug!(
6237                        "{} H2 close with active state: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
6238                        log_context!(self),
6239                        self.state,
6240                        self.streams.len(),
6241                        self.expect_write,
6242                        tls_pending_before,
6243                        self.readiness
6244                    );
6245                    for (stream_id, global_stream_id) in &self.streams {
6246                        let stream = &context.streams[*global_stream_id];
6247                        debug!(
6248                            "{}   close stream id={} gid={}: state={:?}, front_eos={}, back_eos={}, front_phase={:?}, back_phase={:?}, front_completed={}, back_completed={}",
6249                            log_context!(self),
6250                            stream_id,
6251                            global_stream_id,
6252                            stream.state,
6253                            stream.front_received_end_of_stream,
6254                            stream.back_received_end_of_stream,
6255                            stream.front.parsing_phase,
6256                            stream.back.parsing_phase,
6257                            stream.front.is_completed(),
6258                            stream.back.is_completed()
6259                        );
6260                    }
6261                }
6262                if !self.close_notify_sent {
6263                    trace!("{} H2 SENDING CLOSE NOTIFY", log_context!(self));
6264                }
6265                let (tls_pending_after, drain_rounds) =
6266                    drain_tls_close_notify(&mut self.socket, &mut self.close_notify_sent);
6267                if tls_pending_after {
6268                    // Severity tiering: key on stream-count + close-state, not
6269                    // peer-vs-operator. Composes with the send-side `H2Error`
6270                    // variant tier in `goaway()` — both rules demote benign
6271                    // paths and keep loss-bearing paths loud.
6272                    //
6273                    // - `streams != 0`           -> `error!`: live streams at
6274                    //   close time, response-byte loss is possible.
6275                    // - `streams == 0` AND state in {GoAway, Error}
6276                    //                             -> `warn!`: idle close after
6277                    //   a GOAWAY exchange (peer-initiated abort or our own
6278                    //   graceful drain). What's stranded is best-effort
6279                    //   GOAWAY/close_notify; no application data was queued.
6280                    // - `streams == 0` from any other state
6281                    //                             -> `error!`: unexpected
6282                    //   teardown path (no GOAWAY exchange) — keep loud so
6283                    //   unknown failure modes surface.
6284                    if !self.streams.is_empty() {
6285                        error!(
6286                            "{} TLS buffer NOT fully drained on close: \
6287                             pending_before={}, pending_after={}, drain_rounds={}, \
6288                             state={:?}, streams={}, expect_write={:?}, \
6289                             close_notify_sent={}, readiness={:?}",
6290                            log_context!(self),
6291                            tls_pending_before,
6292                            tls_pending_after,
6293                            drain_rounds,
6294                            self.state,
6295                            self.streams.len(),
6296                            self.expect_write,
6297                            self.close_notify_sent,
6298                            self.readiness
6299                        );
6300                    } else if matches!(self.state, H2State::GoAway | H2State::Error) {
6301                        warn!(
6302                            "{} TLS buffer NOT fully drained on close: \
6303                             pending_before={}, pending_after={}, drain_rounds={}, \
6304                             state={:?}, streams={}, expect_write={:?}, \
6305                             close_notify_sent={}, readiness={:?}",
6306                            log_context!(self),
6307                            tls_pending_before,
6308                            tls_pending_after,
6309                            drain_rounds,
6310                            self.state,
6311                            self.streams.len(),
6312                            self.expect_write,
6313                            self.close_notify_sent,
6314                            self.readiness
6315                        );
6316                    } else {
6317                        error!(
6318                            "{} TLS buffer NOT fully drained on close: \
6319                             pending_before={}, pending_after={}, drain_rounds={}, \
6320                             state={:?}, streams={}, expect_write={:?}, \
6321                             close_notify_sent={}, readiness={:?}",
6322                            log_context!(self),
6323                            tls_pending_before,
6324                            tls_pending_after,
6325                            drain_rounds,
6326                            self.state,
6327                            self.streams.len(),
6328                            self.expect_write,
6329                            self.close_notify_sent,
6330                            self.readiness
6331                        );
6332                    }
6333                }
6334                return;
6335            }
6336        }
6337        // reconnection is handled by the server for each stream separately
6338        for global_stream_id in self.streams.values() {
6339            trace!("{} end stream: {}", log_context!(self), global_stream_id);
6340            if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
6341                endpoint.end_stream(token, *global_stream_id, context);
6342            }
6343        }
6344    }
6345
6346    /// Reset a stream: tear down kawa state, emit `RST_STREAM` on the wire,
6347    /// and record MadeYouReset accounting.
6348    ///
6349    /// `wire_stream_id` is the on-wire `StreamId`; `stream_id` is the internal
6350    /// `GlobalStreamId` slot. Callers already carry both so we pass them
6351    /// explicitly rather than scanning `self.streams`. The wire id is threaded
6352    /// into [`Self::enqueue_rst`] which queues the frame for serialisation in
6353    /// [`Self::flush_pending_control_frames`] on the next writable tick —
6354    /// independent of whether the caller immediately evicts the slot via
6355    /// `remove_dead_stream` (which they usually do). This is what guarantees
6356    /// the RST reaches the peer for malformed HEADERS / flow-control /
6357    /// content-length violations flagged by h2spec 2.0.
6358    pub fn reset_stream<E, L>(
6359        &mut self,
6360        wire_stream_id: StreamId,
6361        stream_id: GlobalStreamId,
6362        context: &mut Context<L>,
6363        mut endpoint: E,
6364        error: H2Error,
6365    ) -> MuxResult
6366    where
6367        E: Endpoint,
6368        L: ListenerHandler + L7ListenerHandler,
6369    {
6370        // Compute totals before taking mutable borrows on the target stream.
6371        let reset_byte_totals = self.compute_stream_byte_totals(context);
6372        context.unlink_stream(stream_id);
6373        let stream = &mut context.streams[stream_id];
6374        trace!(
6375            "{} reset H2 stream {}: {:#?}",
6376            log_context!(self),
6377            stream_id,
6378            stream.context
6379        );
6380        let old_state = std::mem::replace(&mut stream.state, StreamState::Unlinked);
6381        forcefully_terminate_answer(stream, &mut self.readiness, error);
6382        let linked_token = if let StreamState::Linked(token) = old_state {
6383            Some(token)
6384        } else {
6385            None
6386        };
6387        let (client_rtt, server_rtt) =
6388            Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
6389        if let Some(token) = linked_token {
6390            endpoint.end_stream(token, stream_id, context);
6391        }
6392        // Emit access log for server-side resets on streams that had active requests
6393        if self.position.is_server()
6394            && matches!(old_state, StreamState::Link | StreamState::Linked(_))
6395        {
6396            let stream = &mut context.streams[stream_id];
6397            self.distribute_overhead(&mut stream.metrics, reset_byte_totals);
6398            stream.metrics.backend_stop();
6399            stream.generate_access_log(
6400                true,
6401                Some("H2::Reset"),
6402                context.listener.clone(),
6403                client_rtt,
6404                server_rtt,
6405            );
6406            stream.metrics.reset();
6407        }
6408        // Queue the RST for wire emission. Independent of the owning stream
6409        // remaining in `self.streams` — callers typically follow this with
6410        // `remove_dead_stream`, which would otherwise evict the slot before
6411        // `write_streams` could run `kawa.prepare` against the converter.
6412        //
6413        // `enqueue_rst` performs every accounting side-effect at queue
6414        // time (per-error counter, global tx counter, CVE-2025-8671
6415        // MadeYouReset lifetime cap). Graceful `NoError` cancels —
6416        // stream recycle, propagated client-side cancel — are exempt
6417        // from the lifetime cap inside the accounting helper itself.
6418        if let Some(result) = self.enqueue_rst(wire_stream_id, error) {
6419            return result;
6420        }
6421        MuxResult::Continue
6422    }
6423
6424    pub fn end_stream<L>(&mut self, stream_gid: GlobalStreamId, context: &mut Context<L>)
6425    where
6426        L: ListenerHandler + L7ListenerHandler,
6427    {
6428        context.unlink_stream(stream_gid);
6429        let stream_context = context.http_context(stream_gid);
6430        trace!(
6431            "{} end H2 stream {}: {:#?}",
6432            log_context!(self),
6433            stream_gid,
6434            stream_context
6435        );
6436        match self.position {
6437            Position::Client(..) => {
6438                // Resolve the wire StreamId for this gid up front so the
6439                // subsequent cleanup does not hold an iterator borrow on
6440                // `self.streams` while also mutating it.
6441                let wire_stream_id = self
6442                    .streams
6443                    .iter()
6444                    .find_map(|(&sid, &gid)| (gid == stream_gid).then_some(sid));
6445                if let Some(id) = wire_stream_id {
6446                    // Only send RST_STREAM if the stream hasn't fully completed.
6447                    // If both request and response are terminated, the stream is
6448                    // already in "closed" state (RFC 9113 §5.1) — sending RST_STREAM
6449                    // on a closed stream would be a protocol error that could cause
6450                    // the H2 peer to close the entire connection.
6451                    let stream = &context.streams[stream_gid];
6452                    let fully_completed =
6453                        stream.back_received_end_of_stream && stream.front.is_terminated();
6454                    if !fully_completed && !self.rst_sent.contains(&id) {
6455                        let kawa = &mut self.zero;
6456                        let mut frame = [0; 13];
6457                        if let Ok((_, _size)) =
6458                            serializer::gen_rst_stream(&mut frame, id, H2Error::Cancel)
6459                        {
6460                            let buf = kawa.storage.space();
6461                            if buf.len() >= frame.len() {
6462                                buf[..frame.len()].copy_from_slice(&frame);
6463                                kawa.storage.fill(frame.len());
6464                                incr!(names::h2::FRAMES_TX_RST_STREAM);
6465                                count!(metric_for_rst_stream_sent(H2Error::Cancel), 1);
6466                                self.readiness.arm_writable();
6467                                self.rst_sent.insert(id);
6468                            }
6469                        }
6470                    }
6471                    // Retire the stream and invalidate expect_write/expect_read
6472                    // if they still reference this gid — the slot may be popped
6473                    // by `shrink_trailing_recycle` on the next create_stream.
6474                    self.remove_dead_stream(id, stream_gid);
6475                    if context.streams[stream_gid].state != StreamState::Recycle {
6476                        context.streams[stream_gid].state = StreamState::Unlinked;
6477                    }
6478                    return;
6479                }
6480                error!(
6481                    "{} end_stream called for unknown global_stream_id {}",
6482                    log_context!(self),
6483                    stream_gid
6484                );
6485            }
6486            Position::Server => {
6487                let answers_rc = context.listener.borrow().get_answers().clone();
6488                let stream = &mut context.streams[stream_gid];
6489                match end_stream_decision(stream) {
6490                    EndStreamAction::ForwardTerminated => {
6491                        #[cfg(debug_assertions)]
6492                        context
6493                            .debug
6494                            .push(DebugEvent::Str(format!("Close terminated {stream_gid}")));
6495                        debug!(
6496                            "{} CLOSING H2 TERMINATED STREAM {} {:?}",
6497                            log_context!(self),
6498                            stream_gid,
6499                            stream
6500                        );
6501                        stream.state = StreamState::Unlinked;
6502                        self.readiness.arm_writable();
6503                        context.debug.set_interesting(true);
6504                    }
6505                    EndStreamAction::CloseDelimited => {
6506                        debug!(
6507                            "{} CLOSE DELIMITED H2 STREAM {} {:?}",
6508                            log_context!(self),
6509                            stream_gid,
6510                            stream
6511                        );
6512                        stream.back.push_block(kawa::Block::Flags(kawa::Flags {
6513                            end_body: true,
6514                            end_chunk: false,
6515                            end_header: false,
6516                            end_stream: true,
6517                        }));
6518                        stream.back.parsing_phase = kawa::ParsingPhase::Terminated;
6519                        stream.state = StreamState::Unlinked;
6520                        self.readiness.arm_writable();
6521                        context.debug.set_interesting(true);
6522                    }
6523                    EndStreamAction::ForwardUnterminated => {
6524                        #[cfg(debug_assertions)]
6525                        context
6526                            .debug
6527                            .push(DebugEvent::Str(format!("Close unterminated {stream_gid}")));
6528                        debug!(
6529                            "{} CLOSING H2 UNTERMINATED STREAM {} {:?}",
6530                            log_context!(self),
6531                            stream_gid,
6532                            stream
6533                        );
6534                        forcefully_terminate_answer(
6535                            stream,
6536                            &mut self.readiness,
6537                            H2Error::InternalError,
6538                        );
6539                        context.debug.set_interesting(true);
6540                    }
6541                    EndStreamAction::SendDefault(status) => {
6542                        #[cfg(debug_assertions)]
6543                        context.debug.push(DebugEvent::Str(format!(
6544                            "Can't retry, send {status} on {stream_gid}"
6545                        )));
6546                        let answers = answers_rc.borrow();
6547                        set_default_answer(stream, &mut self.readiness, status, &answers);
6548                    }
6549                    EndStreamAction::Reconnect => {
6550                        debug!("{} H2 RECONNECT", log_context!(self));
6551                        #[cfg(debug_assertions)]
6552                        context
6553                            .debug
6554                            .push(DebugEvent::Str(format!("Retry {stream_gid}")));
6555                        stream.state = StreamState::Link;
6556                        context.pending_links.push_back(stream_gid);
6557                    }
6558                }
6559            }
6560        }
6561    }
6562
6563    pub fn start_stream<L>(&mut self, stream: GlobalStreamId, _context: &mut Context<L>) -> bool
6564    where
6565        L: ListenerHandler + L7ListenerHandler,
6566    {
6567        // RFC 9113 §6.8: reject new streams on a draining connection
6568        if self.drain.draining {
6569            error!(
6570                "{} Cannot open new stream on draining connection (stream {})",
6571                log_context!(self),
6572                stream
6573            );
6574            return false;
6575        }
6576        // RFC 9113 §5.1.2: respect peer's max concurrent streams limit
6577        if self.streams.len() >= self.peer_settings.settings_max_concurrent_streams as usize {
6578            error!(
6579                "{} Cannot open new stream: active={} >= peer max_concurrent_streams={}",
6580                log_context!(self),
6581                self.streams.len(),
6582                self.peer_settings.settings_max_concurrent_streams
6583            );
6584            return false;
6585        }
6586        trace!(
6587            "{} start new H2 stream {} {:?}",
6588            log_context!(self),
6589            stream,
6590            self.readiness
6591        );
6592        let Some(stream_id) = self.new_stream_id() else {
6593            // Pass 4 Medium #5: the client-initiated stream-ID space
6594            // (31 bits, odd only) is exhausted. The backend is now useless
6595            // for new requests — gracefully drain it. Without this
6596            // transition, the Connection lingers in `Connected` state and
6597            // every subsequent request returns 503 because `start_stream`
6598            // keeps returning false.
6599            //
6600            // The session envelope is hoisted to a local because the
6601            // `match &mut self.position` below holds a mutable borrow on
6602            // `self.position`, and `log_context!(self)` reads that field
6603            // for its `position={...}` slot — calling the macro inside the
6604            // match arms would conflict with the active borrow. The
6605            // bidirectional regression guard in `lib/tests/log_layout.rs`
6606            // (and the matching scanner in `lib/build.rs`) recognises this
6607            // shape by scanning backward as well as forward from each log
6608            // call.
6609            let context = log_context!(self);
6610            match &mut self.position {
6611                Position::Client(cluster_id, backend, status) => {
6612                    let backend_addr = backend.borrow().address;
6613                    let cluster = cluster_id.clone();
6614                    info!(
6615                        "{} H2 backend stream IDs exhausted (cluster={}, backend={:?}) — draining",
6616                        context, cluster, backend_addr
6617                    );
6618                    *status = BackendStatus::Disconnecting;
6619                }
6620                Position::Server => {
6621                    error!(
6622                        "{} H2 server stream IDs exhausted — sending graceful GOAWAY",
6623                        context
6624                    );
6625                }
6626            }
6627            self.graceful_goaway();
6628            return false;
6629        };
6630        self.streams.insert(stream_id, stream);
6631        self.stream_last_activity_at
6632            .insert(stream_id, Instant::now());
6633        self.readiness.arm_writable();
6634        true
6635    }
6636}
6637
6638#[cfg(test)]
6639mod tests {
6640    use std::{cell::RefCell, rc::Rc};
6641
6642    use super::*;
6643    use crate::{pool::Pool, protocol::kawa_h1::editor::HttpContext};
6644
6645    // ── H2FloodDetector ──────────────────────────────────────────────────
6646
6647    #[test]
6648    fn test_flood_detector_no_flood_below_threshold() {
6649        let config = H2FloodConfig::default();
6650        let mut detector = H2FloodDetector::new(config);
6651
6652        // All counters at zero -> no flood
6653        assert!(detector.check_flood().is_none());
6654
6655        // Increment each counter to exactly the threshold (not exceeding)
6656        detector.rst_stream_count = config.max_rst_stream_per_window;
6657        detector.ping_count = config.max_ping_per_window;
6658        detector.settings_count = config.max_settings_per_window;
6659        detector.empty_data_count = config.max_empty_data_per_window;
6660        detector.continuation_count = config.max_continuation_frames;
6661        detector.glitch_count = config.max_glitch_count;
6662        // At threshold but not exceeding -> no flood
6663        assert!(detector.check_flood().is_none());
6664    }
6665
6666    #[test]
6667    fn test_flood_detector_detects_rapid_reset() {
6668        let config = H2FloodConfig::default();
6669        let mut detector = H2FloodDetector::new(config);
6670
6671        detector.rst_stream_count = config.max_rst_stream_per_window + 1;
6672        assert!(matches!(
6673            detector.check_flood(),
6674            Some(H2FloodViolation {
6675                error: H2Error::EnhanceYourCalm,
6676                ..
6677            })
6678        ));
6679    }
6680
6681    #[test]
6682    fn test_flood_detector_detects_ping_flood() {
6683        let config = H2FloodConfig::default();
6684        let mut detector = H2FloodDetector::new(config);
6685
6686        detector.ping_count = config.max_ping_per_window + 1;
6687        assert!(matches!(
6688            detector.check_flood(),
6689            Some(H2FloodViolation {
6690                error: H2Error::EnhanceYourCalm,
6691                ..
6692            })
6693        ));
6694    }
6695
6696    #[test]
6697    fn test_flood_detector_detects_settings_flood() {
6698        let config = H2FloodConfig::default();
6699        let mut detector = H2FloodDetector::new(config);
6700
6701        detector.settings_count = config.max_settings_per_window + 1;
6702        assert!(matches!(
6703            detector.check_flood(),
6704            Some(H2FloodViolation {
6705                error: H2Error::EnhanceYourCalm,
6706                ..
6707            })
6708        ));
6709    }
6710
6711    #[test]
6712    fn test_flood_detector_detects_empty_data_flood() {
6713        let config = H2FloodConfig::default();
6714        let mut detector = H2FloodDetector::new(config);
6715
6716        detector.empty_data_count = config.max_empty_data_per_window + 1;
6717        assert!(matches!(
6718            detector.check_flood(),
6719            Some(H2FloodViolation {
6720                error: H2Error::EnhanceYourCalm,
6721                ..
6722            })
6723        ));
6724    }
6725
6726    #[test]
6727    fn test_flood_detector_detects_continuation_flood() {
6728        let config = H2FloodConfig::default();
6729        let mut detector = H2FloodDetector::new(config);
6730
6731        detector.continuation_count = config.max_continuation_frames + 1;
6732        assert!(matches!(
6733            detector.check_flood(),
6734            Some(H2FloodViolation {
6735                error: H2Error::EnhanceYourCalm,
6736                ..
6737            })
6738        ));
6739    }
6740
6741    #[test]
6742    fn test_flood_detector_detects_header_size_flood() {
6743        let config = H2FloodConfig::default();
6744        let mut detector = H2FloodDetector::new(config);
6745
6746        detector.accumulated_header_size = MAX_HEADER_LIST_SIZE as u32 + 1;
6747        assert!(matches!(
6748            detector.check_flood(),
6749            Some(H2FloodViolation {
6750                error: H2Error::EnhanceYourCalm,
6751                ..
6752            })
6753        ));
6754    }
6755
6756    #[test]
6757    fn test_flood_detector_detects_glitch_flood() {
6758        let config = H2FloodConfig::default();
6759        let mut detector = H2FloodDetector::new(config);
6760
6761        detector.glitch_count = config.max_glitch_count + 1;
6762        assert!(matches!(
6763            detector.check_flood(),
6764            Some(H2FloodViolation {
6765                error: H2Error::EnhanceYourCalm,
6766                ..
6767            })
6768        ));
6769    }
6770
6771    #[test]
6772    fn test_flood_detector_custom_thresholds() {
6773        let config = H2FloodConfig {
6774            max_rst_stream_per_window: 5,
6775            max_ping_per_window: 10,
6776            max_settings_per_window: 3,
6777            max_empty_data_per_window: 8,
6778            max_continuation_frames: 2,
6779            max_glitch_count: 15,
6780            ..H2FloodConfig::default()
6781        };
6782        let mut detector = H2FloodDetector::new(config);
6783
6784        // Below custom threshold -> no flood
6785        detector.rst_stream_count = 5;
6786        assert!(detector.check_flood().is_none());
6787
6788        // Above custom threshold -> flood
6789        detector.rst_stream_count = 6;
6790        assert!(matches!(
6791            detector.check_flood(),
6792            Some(H2FloodViolation {
6793                error: H2Error::EnhanceYourCalm,
6794                ..
6795            })
6796        ));
6797    }
6798
6799    #[test]
6800    fn test_flood_detector_reset_continuation() {
6801        let config = H2FloodConfig::default();
6802        let mut detector = H2FloodDetector::new(config);
6803
6804        detector.continuation_count = 15;
6805        detector.accumulated_header_size = 30000;
6806
6807        detector.reset_continuation();
6808
6809        assert_eq!(detector.continuation_count, 0);
6810        assert_eq!(detector.accumulated_header_size, 0);
6811    }
6812
6813    #[test]
6814    fn test_flood_detector_half_decay_on_window_expiry() {
6815        let config = H2FloodConfig::default();
6816        let mut detector = H2FloodDetector::new(config);
6817
6818        detector.rst_stream_count = 80;
6819        detector.ping_count = 60;
6820        detector.settings_count = 40;
6821        detector.empty_data_count = 20;
6822        detector.window_update_stream0_count = 90;
6823        detector.glitch_count = 50;
6824
6825        // Force window expiry by setting window_start to the past
6826        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6827
6828        // check_flood calls maybe_reset_window which halves counters
6829        let _ = detector.check_flood();
6830
6831        assert_eq!(detector.rst_stream_count, 40);
6832        assert_eq!(detector.ping_count, 30);
6833        assert_eq!(detector.settings_count, 20);
6834        assert_eq!(detector.empty_data_count, 10);
6835        assert_eq!(detector.window_update_stream0_count, 45);
6836        assert_eq!(detector.glitch_count, 25);
6837    }
6838
6839    #[test]
6840    fn test_flood_detector_window_update_stream0_trips_at_threshold() {
6841        let config = H2FloodConfig {
6842            max_window_update_stream0_per_window: 5,
6843            ..H2FloodConfig::default()
6844        };
6845        let mut detector = H2FloodDetector::new(config);
6846
6847        // At threshold — no flood yet (strict greater-than, matches existing counters).
6848        detector.window_update_stream0_count = 5;
6849        assert!(detector.check_flood().is_none());
6850
6851        // Above threshold — flood with the correct violation reason + metric key.
6852        detector.window_update_stream0_count = 6;
6853        let violation = detector
6854            .check_flood()
6855            .expect("WINDOW_UPDATE stream-0 flood must trip above threshold");
6856        assert_eq!(violation.error, H2Error::EnhanceYourCalm);
6857        assert_eq!(violation.reason, "WINDOW_UPDATE stream 0");
6858        assert_eq!(
6859            violation.metric_key,
6860            "h2.flood.violation.window_update_stream0_window"
6861        );
6862        assert_eq!(violation.count, 6);
6863        assert_eq!(violation.threshold, 5);
6864    }
6865
6866    #[test]
6867    fn test_flood_detector_window_update_stream0_honours_default() {
6868        // Default threshold must match the documented constant so operators
6869        // can reason about behaviour without reading code.
6870        let detector = H2FloodDetector::default();
6871        assert_eq!(
6872            detector.config.max_window_update_stream0_per_window,
6873            DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW
6874        );
6875        assert_eq!(detector.window_update_stream0_count, 0);
6876    }
6877
6878    #[test]
6879    fn test_flood_detector_decay_prevents_flood() {
6880        let config = H2FloodConfig {
6881            max_rst_stream_per_window: 10,
6882            ..H2FloodConfig::default()
6883        };
6884        let mut detector = H2FloodDetector::new(config);
6885
6886        // Set counter just above threshold
6887        detector.rst_stream_count = 12;
6888
6889        // Without decay -> flood
6890        assert!(matches!(
6891            detector.check_flood(),
6892            Some(H2FloodViolation {
6893                error: H2Error::EnhanceYourCalm,
6894                ..
6895            })
6896        ));
6897
6898        // Reset and simulate window expiry
6899        detector.rst_stream_count = 12;
6900        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6901
6902        // After decay: 12/2 = 6, which is below threshold 10 -> no flood
6903        assert!(detector.check_flood().is_none());
6904    }
6905
6906    #[test]
6907    fn test_flood_detector_lifetime_rst_cap_triggers_enhance_your_calm() {
6908        // CVE-2023-44487 Rapid Reset: a patient attacker that stays under
6909        // the half-decaying per-window threshold must still be stopped by
6910        // the lifetime cap. Simulate a response-started RST (no abusive
6911        // counter bump) so only the lifetime ceiling is tested.
6912        let mut detector = H2FloodDetector::default();
6913        for _ in 0..DEFAULT_MAX_RST_STREAM_LIFETIME {
6914            assert!(detector.record_rst_lifetime(true).is_none());
6915        }
6916        assert_eq!(
6917            detector.total_rst_received_lifetime,
6918            DEFAULT_MAX_RST_STREAM_LIFETIME
6919        );
6920        assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6921        // Next RST crosses the ceiling.
6922        assert!(matches!(
6923            detector.record_rst_lifetime(true),
6924            Some(H2FloodViolation {
6925                error: H2Error::EnhanceYourCalm,
6926                ..
6927            })
6928        ));
6929    }
6930
6931    #[test]
6932    fn test_flood_detector_abusive_rst_cap_triggers_first() {
6933        // Pre-response-start RSTs have a much lower ceiling; they trip
6934        // well before the generic lifetime cap.
6935        let mut detector = H2FloodDetector::default();
6936        for _ in 0..DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME {
6937            assert!(detector.record_rst_lifetime(false).is_none());
6938        }
6939        assert_eq!(
6940            detector.total_abusive_rst_received_lifetime,
6941            DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME
6942        );
6943        assert!(matches!(
6944            detector.record_rst_lifetime(false),
6945            Some(H2FloodViolation {
6946                error: H2Error::EnhanceYourCalm,
6947                ..
6948            })
6949        ));
6950    }
6951
6952    #[test]
6953    fn test_flood_detector_emitted_rst_below_threshold_is_clean() {
6954        // Server may legitimately RST some streams (protocol errors,
6955        // client-side abuse caught by other mitigations). Staying at the
6956        // threshold must not trip the ceiling.
6957        let mut detector = H2FloodDetector::default();
6958        for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6959            assert!(detector.record_rst_emitted().is_none());
6960        }
6961        assert_eq!(
6962            detector.total_rst_streams_emitted_lifetime,
6963            DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME
6964        );
6965    }
6966
6967    #[test]
6968    fn test_flood_detector_emitted_rst_cap_triggers_made_you_reset() {
6969        // CVE-2025-8671 MadeYouReset: unbounded server-emitted RST_STREAM is
6970        // a DoS vector equivalent to Rapid Reset with the emission direction
6971        // flipped. Crossing the ceiling must surface a EnhanceYourCalm
6972        // violation so the caller can GOAWAY.
6973        let mut detector = H2FloodDetector::default();
6974        for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6975            assert!(detector.record_rst_emitted().is_none());
6976        }
6977        let violation = detector
6978            .record_rst_emitted()
6979            .expect("emitting past the cap should produce a violation");
6980        assert!(matches!(
6981            violation,
6982            H2FloodViolation {
6983                error: H2Error::EnhanceYourCalm,
6984                reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
6985                ..
6986            }
6987        ));
6988        assert_eq!(violation.count, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME + 1);
6989        assert_eq!(violation.threshold, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME);
6990    }
6991
6992    #[test]
6993    fn test_flood_detector_emitted_rst_counter_does_not_decay() {
6994        // Unlike the windowed rst_stream_count, the emitted lifetime counter
6995        // is strictly monotonic — a patient attacker cannot reset it by
6996        // waiting out a window. maybe_reset_window must NOT touch it.
6997        let mut detector = H2FloodDetector::default();
6998        for _ in 0..10 {
6999            detector.record_rst_emitted();
7000        }
7001        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
7002        // Force a window reset through check_flood.
7003        let _ = detector.check_flood();
7004        assert_eq!(detector.total_rst_streams_emitted_lifetime, 10);
7005    }
7006
7007    /// Every violation kind must carry a metric_key under the agreed
7008    /// `h2.flood.violation.*` namespace, and the keys must be unique. The
7009    /// statsd counter at `handle_flood_violation` reads `violation.metric_key`
7010    /// directly — drift between the construction site and the metric name
7011    /// would silently lose alerting on a CVE mitigation.
7012    #[test]
7013    fn test_flood_violation_metric_keys_are_unique_and_namespaced() {
7014        // Helper: run `record_rst_lifetime` until it trips, returning the metric_key.
7015        fn key_from_rst_lifetime(response_started: bool) -> &'static str {
7016            let mut detector = H2FloodDetector::default();
7017            loop {
7018                if let Some(v) = detector.record_rst_lifetime(response_started) {
7019                    return v.metric_key;
7020                }
7021            }
7022        }
7023
7024        // Helper: run `record_rst_emitted` until it trips, returning the metric_key.
7025        fn key_from_rst_emitted() -> &'static str {
7026            let mut detector = H2FloodDetector::default();
7027            loop {
7028                if let Some(v) = detector.record_rst_emitted() {
7029                    return v.metric_key;
7030                }
7031            }
7032        }
7033
7034        // Helper: drive a single `check_flood` counter past its threshold.
7035        fn key_from_check_flood(setup: impl FnOnce(&mut H2FloodDetector)) -> &'static str {
7036            let mut detector = H2FloodDetector::default();
7037            setup(&mut detector);
7038            detector
7039                .check_flood()
7040                .expect("setup should always trip a flood")
7041                .metric_key
7042        }
7043
7044        let keys: [&'static str; 12] = [
7045            // Lifetime methods on the detector itself.
7046            key_from_rst_lifetime(true),
7047            key_from_rst_lifetime(false),
7048            key_from_rst_emitted(),
7049            // `check_flood` arms.
7050            key_from_check_flood(|d| d.rst_stream_count = u32::MAX),
7051            key_from_check_flood(|d| d.ping_count = u32::MAX),
7052            key_from_check_flood(|d| d.total_ping_received_lifetime = u32::MAX),
7053            key_from_check_flood(|d| d.settings_count = u32::MAX),
7054            key_from_check_flood(|d| d.total_settings_received_lifetime = u32::MAX),
7055            key_from_check_flood(|d| d.empty_data_count = u32::MAX),
7056            key_from_check_flood(|d| d.continuation_count = u32::MAX),
7057            key_from_check_flood(|d| d.accumulated_header_size = u32::MAX),
7058            key_from_check_flood(|d| d.glitch_count = u32::MAX),
7059        ];
7060
7061        for key in keys {
7062            assert!(
7063                key.starts_with("h2.flood.violation."),
7064                "metric key {key} is missing the h2.flood.violation. prefix",
7065            );
7066        }
7067        let mut deduped = keys.to_vec();
7068        deduped.sort_unstable();
7069        deduped.dedup();
7070        assert_eq!(
7071            deduped.len(),
7072            keys.len(),
7073            "metric keys must be unique across violation kinds; collisions: {keys:?}",
7074        );
7075    }
7076
7077    /// All four `metric_for_*` helpers must yield distinct, namespaced keys for
7078    /// every RFC 9113 §7 error code. The macro behind them uses `concat!`, so a
7079    /// new H2Error variant fails the build inside the macro — but a typo in
7080    /// the helper prefix would silently land. Walk every (direction × kind)
7081    /// pair and dedupe the set.
7082    /// `h2_frame_rx_metric_key` must yield a distinct `&'static str` per
7083    /// `Frame::*` variant. The single dispatch site in `handle_frame` reads
7084    /// from this helper, so a typo or duplicate would silently clobber the
7085    /// frame-mix dashboard. Asserting the literal set lets us compare against
7086    /// `doc/configure.md` and the RFC 9113 §6 frame catalogue without
7087    /// reconstructing every Frame variant in the test.
7088    #[test]
7089    fn test_h2_frame_rx_metric_keys_are_unique_and_namespaced() {
7090        // Update this list whenever a new Frame variant is added — the helper
7091        // match is also exhaustive, so the build will already break there
7092        // before anyone notices the test missing a key.
7093        let expected: [&'static str; 11] = [
7094            "h2.frames.rx.data",
7095            "h2.frames.rx.headers",
7096            "h2.frames.rx.push_promise",
7097            "h2.frames.rx.priority",
7098            "h2.frames.rx.rst_stream",
7099            "h2.frames.rx.settings",
7100            "h2.frames.rx.ping",
7101            "h2.frames.rx.goaway",
7102            "h2.frames.rx.window_update",
7103            "h2.frames.rx.continuation",
7104            "h2.frames.rx.unknown",
7105        ];
7106
7107        for key in expected {
7108            assert!(
7109                key.starts_with("h2.frames.rx."),
7110                "metric key {key} is missing the h2.frames.rx. prefix",
7111            );
7112        }
7113        let mut deduped = expected.to_vec();
7114        deduped.sort_unstable();
7115        deduped.dedup();
7116        assert_eq!(
7117            deduped.len(),
7118            expected.len(),
7119            "frame-rx metric keys must be unique; collisions in: {expected:?}",
7120        );
7121
7122        // Spot-check the helper for the one variant we can construct without
7123        // borrowing into a frame body — `Frame::Unknown(u8)` is just a tag.
7124        assert_eq!(
7125            h2_frame_rx_metric_key(&Frame::Unknown(42)),
7126            "h2.frames.rx.unknown",
7127        );
7128    }
7129
7130    #[test]
7131    fn test_per_error_code_metric_keys_are_unique_and_namespaced() {
7132        const ALL_ERRORS: [H2Error; 14] = [
7133            H2Error::NoError,
7134            H2Error::ProtocolError,
7135            H2Error::InternalError,
7136            H2Error::FlowControlError,
7137            H2Error::SettingsTimeout,
7138            H2Error::StreamClosed,
7139            H2Error::FrameSizeError,
7140            H2Error::RefusedStream,
7141            H2Error::Cancel,
7142            H2Error::CompressionError,
7143            H2Error::ConnectError,
7144            H2Error::EnhanceYourCalm,
7145            H2Error::InadequateSecurity,
7146            H2Error::HTTP11Required,
7147        ];
7148
7149        let mut keys: Vec<&'static str> = Vec::new();
7150        for error in ALL_ERRORS {
7151            let code = error as u32;
7152            keys.push(metric_for_goaway_sent(error));
7153            keys.push(metric_for_goaway_received(code));
7154            keys.push(metric_for_rst_stream_sent(error));
7155            keys.push(metric_for_rst_stream_received(code));
7156        }
7157        // …plus the four `unknown_error` fallbacks for codes outside RFC 9113 §7.
7158        let unknown_code = 0xff;
7159        assert!(H2Error::try_from(unknown_code).is_err());
7160        keys.push(metric_for_goaway_received(unknown_code));
7161        keys.push(metric_for_rst_stream_received(unknown_code));
7162        // …and the dedicated Rapid Reset signature counter.
7163        keys.push(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START);
7164
7165        for key in &keys {
7166            assert!(
7167                key.starts_with("h2.goaway.sent.")
7168                    || key.starts_with("h2.goaway.received.")
7169                    || key.starts_with("h2.rst_stream.sent.")
7170                    || key.starts_with("h2.rst_stream.received."),
7171                "metric key {key} does not match a known per-error-code namespace",
7172            );
7173        }
7174        let mut deduped = keys.clone();
7175        deduped.sort_unstable();
7176        deduped.dedup();
7177        assert_eq!(
7178            deduped.len(),
7179            keys.len(),
7180            "per-error-code metric keys must be unique; collisions in: {keys:?}",
7181        );
7182    }
7183
7184    #[test]
7185    fn test_flood_detector_response_started_rst_not_abusive() {
7186        // When the backend response has begun, the RST is cheap for us
7187        // too — it only bumps the generic lifetime counter.
7188        let mut detector = H2FloodDetector::default();
7189        for _ in 0..(DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100) {
7190            assert!(detector.record_rst_lifetime(true).is_none());
7191        }
7192        assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
7193        assert_eq!(
7194            detector.total_rst_received_lifetime,
7195            DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100
7196        );
7197    }
7198
7199    #[test]
7200    fn test_flood_detector_default_matches_new_default() {
7201        let from_default = H2FloodDetector::default();
7202        let from_new = H2FloodDetector::new(H2FloodConfig::default());
7203
7204        assert_eq!(from_default.rst_stream_count, from_new.rst_stream_count);
7205        assert_eq!(from_default.ping_count, from_new.ping_count);
7206        assert_eq!(from_default.settings_count, from_new.settings_count);
7207        assert_eq!(from_default.empty_data_count, from_new.empty_data_count);
7208        assert_eq!(from_default.continuation_count, from_new.continuation_count);
7209        assert_eq!(
7210            from_default.accumulated_header_size,
7211            from_new.accumulated_header_size
7212        );
7213        assert_eq!(from_default.glitch_count, from_new.glitch_count);
7214        assert_eq!(from_default.config, from_new.config);
7215    }
7216
7217    // ── Prioriser ────────────────────────────────────────────────────────
7218
7219    #[test]
7220    fn test_prioriser_defaults_for_unknown_stream() {
7221        let p = Prioriser::default();
7222        // Unknown stream -> RFC 9218 defaults: urgency 3, incremental false
7223        assert_eq!(p.get(&1), (3, false));
7224        assert_eq!(p.get(&999), (3, false));
7225    }
7226
7227    #[test]
7228    fn test_prioriser_push_rfc9218_and_get() {
7229        let mut p = Prioriser::default();
7230
7231        let invalid = p.push_priority(
7232            1,
7233            parser::PriorityPart::Rfc9218 {
7234                urgency: 0,
7235                incremental: true,
7236            },
7237        );
7238        assert!(!invalid);
7239        assert_eq!(p.get(&1), (0, true));
7240
7241        let invalid = p.push_priority(
7242            3,
7243            parser::PriorityPart::Rfc9218 {
7244                urgency: 7,
7245                incremental: false,
7246            },
7247        );
7248        assert!(!invalid);
7249        assert_eq!(p.get(&3), (7, false));
7250    }
7251
7252    #[test]
7253    fn test_prioriser_urgency_clamped_to_7() {
7254        let mut p = Prioriser::default();
7255
7256        p.push_priority(
7257            1,
7258            parser::PriorityPart::Rfc9218 {
7259                urgency: 255,
7260                incremental: false,
7261            },
7262        );
7263        assert_eq!(p.get(&1), (7, false));
7264    }
7265
7266    #[test]
7267    fn test_prioriser_update_priority() {
7268        let mut p = Prioriser::default();
7269
7270        p.push_priority(
7271            1,
7272            parser::PriorityPart::Rfc9218 {
7273                urgency: 3,
7274                incremental: false,
7275            },
7276        );
7277        assert_eq!(p.get(&1), (3, false));
7278
7279        // Update same stream
7280        p.push_priority(
7281            1,
7282            parser::PriorityPart::Rfc9218 {
7283                urgency: 1,
7284                incremental: true,
7285            },
7286        );
7287        assert_eq!(p.get(&1), (1, true));
7288    }
7289
7290    #[test]
7291    fn test_prioriser_remove() {
7292        let mut p = Prioriser::default();
7293
7294        p.push_priority(
7295            1,
7296            parser::PriorityPart::Rfc9218 {
7297                urgency: 0,
7298                incremental: true,
7299            },
7300        );
7301        assert_eq!(p.get(&1), (0, true));
7302
7303        p.remove(&1);
7304        // After removal, falls back to defaults
7305        assert_eq!(p.get(&1), (3, false));
7306    }
7307
7308    #[test]
7309    fn test_prioriser_rfc7540_self_dependency() {
7310        let mut p = Prioriser::default();
7311
7312        // Self-dependency should return true (invalid)
7313        let invalid = p.push_priority(
7314            5,
7315            parser::PriorityPart::Rfc7540 {
7316                stream_dependency: parser::StreamDependency {
7317                    exclusive: false,
7318                    stream_id: 5, // same as stream_id
7319                },
7320                weight: 16,
7321            },
7322        );
7323        assert!(invalid);
7324    }
7325
7326    #[test]
7327    fn test_prioriser_rfc7540_valid_dependency() {
7328        let mut p = Prioriser::default();
7329
7330        // Non-self dependency is valid (but ignored for scheduling)
7331        let invalid = p.push_priority(
7332            5,
7333            parser::PriorityPart::Rfc7540 {
7334                stream_dependency: parser::StreamDependency {
7335                    exclusive: false,
7336                    stream_id: 3, // different stream
7337                },
7338                weight: 16,
7339            },
7340        );
7341        assert!(!invalid);
7342        // Still returns defaults since RFC 7540 priority is ignored
7343        assert_eq!(p.get(&5), (3, false));
7344    }
7345
7346    #[test]
7347    fn test_prioriser_max_entries_cap() {
7348        let mut p = Prioriser::default();
7349
7350        // Fill up to MAX_PRIORITIES
7351        for i in 0..MAX_PRIORITIES as u32 {
7352            let stream_id = i * 2 + 1; // odd stream IDs
7353            p.push_priority(
7354                stream_id,
7355                parser::PriorityPart::Rfc9218 {
7356                    urgency: (i % 8) as u8,
7357                    incremental: false,
7358                },
7359            );
7360        }
7361
7362        // Next insert for a new stream should be silently rejected
7363        let next_id = (MAX_PRIORITIES as u32) * 2 + 1;
7364        let invalid = p.push_priority(
7365            next_id,
7366            parser::PriorityPart::Rfc9218 {
7367                urgency: 0,
7368                incremental: true,
7369            },
7370        );
7371        assert!(!invalid); // not a protocol error, just silently dropped
7372        assert_eq!(p.get(&next_id), (3, false)); // defaults, not stored
7373    }
7374
7375    #[test]
7376    fn test_prioriser_update_existing_at_cap() {
7377        let mut p = Prioriser::default();
7378
7379        // Fill to cap
7380        for i in 0..MAX_PRIORITIES as u32 {
7381            p.push_priority(
7382                i * 2 + 1,
7383                parser::PriorityPart::Rfc9218 {
7384                    urgency: 3,
7385                    incremental: false,
7386                },
7387            );
7388        }
7389
7390        // Updating an existing entry should still work even at cap
7391        p.push_priority(
7392            1,
7393            parser::PriorityPart::Rfc9218 {
7394                urgency: 0,
7395                incremental: true,
7396            },
7397        );
7398        assert_eq!(p.get(&1), (0, true));
7399    }
7400
7401    #[test]
7402    fn test_prioriser_guarded_accepts_open_stream() {
7403        let mut p = Prioriser::default();
7404        let mut open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7405        open.insert(3, 0);
7406        let invalid = p.push_priority_guarded(
7407            3,
7408            parser::PriorityPart::Rfc9218 {
7409                urgency: 1,
7410                incremental: false,
7411            },
7412            7,
7413            &open,
7414        );
7415        assert!(!invalid);
7416        assert_eq!(p.get(&3), (1, false));
7417    }
7418
7419    #[test]
7420    fn test_prioriser_guarded_accepts_idle_lookahead() {
7421        let mut p = Prioriser::default();
7422        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7423        // Just ahead of last_stream_id, within PRIORITY_IDLE_LOOKAHEAD.
7424        let invalid = p.push_priority_guarded(
7425            105,
7426            parser::PriorityPart::Rfc9218 {
7427                urgency: 2,
7428                incremental: true,
7429            },
7430            99,
7431            &open,
7432        );
7433        assert!(!invalid);
7434        assert_eq!(p.get(&105), (2, true));
7435    }
7436
7437    #[test]
7438    fn test_prioriser_guarded_drops_far_future_stream() {
7439        let mut p = Prioriser::default();
7440        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7441        // Beyond the 64-slot lookahead window.
7442        let invalid = p.push_priority_guarded(
7443            1_000_001,
7444            parser::PriorityPart::Rfc9218 {
7445                urgency: 0,
7446                incremental: false,
7447            },
7448            3,
7449            &open,
7450        );
7451        assert!(!invalid); // not a protocol error, just dropped
7452        // Default priority returned — no entry stored.
7453        assert_eq!(p.get(&1_000_001), (DEFAULT_URGENCY, false));
7454    }
7455
7456    #[test]
7457    fn test_prioriser_guarded_drops_closed_past_stream() {
7458        let mut p = Prioriser::default();
7459        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7460        // Past the counter and not open = already closed. Drop.
7461        let invalid = p.push_priority_guarded(
7462            3,
7463            parser::PriorityPart::Rfc9218 {
7464                urgency: 5,
7465                incremental: false,
7466            },
7467            99,
7468            &open,
7469        );
7470        assert!(!invalid);
7471        assert_eq!(p.get(&3), (DEFAULT_URGENCY, false));
7472    }
7473
7474    #[test]
7475    fn test_prioriser_guarded_cannot_flood_with_far_ids() {
7476        // Previously an attacker could pack MAX_PRIORITIES entries by picking
7477        // far-future stream IDs. The guard rejects them before the cap helps.
7478        let mut p = Prioriser::default();
7479        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7480        for delta in 10_000..(10_000 + MAX_PRIORITIES as u32) {
7481            p.push_priority_guarded(
7482                delta,
7483                parser::PriorityPart::Rfc9218 {
7484                    urgency: 0,
7485                    incremental: false,
7486                },
7487                0,
7488                &open,
7489            );
7490        }
7491        assert_eq!(p.priorities.len(), 0);
7492    }
7493
7494    // ── RFC 9218 §4 round-robin rotation ───────────────────────────────
7495
7496    /// Helper: mark `stream_id` as (urgency, incremental) in the map.
7497    fn set_prio(p: &mut Prioriser, stream_id: StreamId, urgency: u8, incremental: bool) {
7498        p.push_priority(
7499            stream_id,
7500            parser::PriorityPart::Rfc9218 {
7501                urgency,
7502                incremental,
7503            },
7504        );
7505    }
7506
7507    #[test]
7508    fn test_apply_incremental_rotation_all_non_incremental_is_noop() {
7509        // Non-incremental streams keep the existing (urgency, stream_id) sort.
7510        let mut p = Prioriser::default();
7511        set_prio(&mut p, 1, 3, false);
7512        set_prio(&mut p, 3, 3, false);
7513        set_prio(&mut p, 5, 3, false);
7514
7515        let mut buf = vec![1u32, 3, 5];
7516        let count = p.apply_incremental_rotation(&mut buf);
7517        assert_eq!(count, 0);
7518        assert_eq!(buf, vec![1, 3, 5]);
7519    }
7520
7521    #[test]
7522    fn test_apply_incremental_rotation_moves_incremental_to_tail() {
7523        // Within a same-urgency bucket non-incremental must come before
7524        // incremental, each subrange staying ascending.
7525        let mut p = Prioriser::default();
7526        set_prio(&mut p, 1, 3, true);
7527        set_prio(&mut p, 3, 3, false);
7528        set_prio(&mut p, 5, 3, true);
7529        set_prio(&mut p, 7, 3, false);
7530
7531        let mut buf = vec![1u32, 3, 5, 7];
7532        let count = p.apply_incremental_rotation(&mut buf);
7533        assert_eq!(count, 2);
7534        // Non-incremental first (3, 7), then incremental (1, 5) — ascending
7535        // within each subrange before the cursor rotation.
7536        assert_eq!(buf, vec![3, 7, 1, 5]);
7537    }
7538
7539    #[test]
7540    fn test_apply_incremental_rotation_respects_urgency_buckets() {
7541        // Different urgency buckets must not be mixed.
7542        let mut p = Prioriser::default();
7543        set_prio(&mut p, 1, 0, true); // urgent incremental
7544        set_prio(&mut p, 3, 3, false); // default non-incremental
7545        set_prio(&mut p, 5, 3, true); // default incremental
7546        set_prio(&mut p, 7, 5, false); // low-priority non-incremental
7547
7548        // Input is pre-sorted by (urgency, id) as the scheduler does.
7549        let mut buf = vec![1u32, 3, 5, 7];
7550        let count = p.apply_incremental_rotation(&mut buf);
7551        assert_eq!(count, 2);
7552        // Bucket 0: [1] (alone, stays). Bucket 3: [3] non-inc, [5] inc.
7553        // Bucket 5: [7] alone. Cross-bucket order is preserved.
7554        assert_eq!(buf, vec![1, 3, 5, 7]);
7555    }
7556
7557    #[test]
7558    fn test_apply_incremental_rotation_rotates_by_cursor() {
7559        // Three same-urgency incremental streams: cursor advancement shifts
7560        // the bucket so the next pass starts after the previously fired ID.
7561        let mut p = Prioriser::default();
7562        set_prio(&mut p, 1, 3, true);
7563        set_prio(&mut p, 3, 3, true);
7564        set_prio(&mut p, 5, 3, true);
7565
7566        let base = vec![1u32, 3, 5];
7567
7568        // Pass 1: cursor is 0 (initial), so order stays 1, 3, 5.
7569        let mut buf = base.clone();
7570        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7571        assert_eq!(buf, vec![1, 3, 5]);
7572        p.advance_incremental_cursor(Some(1));
7573
7574        // Pass 2: cursor is 1, rotate so 3 comes first.
7575        let mut buf = base.clone();
7576        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7577        assert_eq!(buf, vec![3, 5, 1]);
7578        p.advance_incremental_cursor(Some(3));
7579
7580        // Pass 3: cursor is 3, rotate so 5 comes first.
7581        let mut buf = base.clone();
7582        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7583        assert_eq!(buf, vec![5, 1, 3]);
7584        p.advance_incremental_cursor(Some(5));
7585
7586        // Pass 4: cursor is 5 (largest in bucket), wrap to 1.
7587        let mut buf = base;
7588        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7589        assert_eq!(buf, vec![1, 3, 5]);
7590    }
7591
7592    #[test]
7593    fn test_apply_incremental_rotation_cursor_unknown_id() {
7594        // Cursor points at an ID no longer active (stream completed). Rotation
7595        // should still start from the smallest ID greater than the cursor.
7596        let mut p = Prioriser::default();
7597        set_prio(&mut p, 3, 3, true);
7598        set_prio(&mut p, 5, 3, true);
7599        set_prio(&mut p, 7, 3, true);
7600        p.advance_incremental_cursor(Some(4)); // 4 is not in the bucket
7601
7602        let mut buf = vec![3u32, 5, 7];
7603        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7604        assert_eq!(buf, vec![5, 7, 3]);
7605    }
7606
7607    #[test]
7608    fn test_apply_incremental_rotation_single_stream_buckets() {
7609        // Single-stream buckets are a degenerate fast path: no reordering.
7610        let mut p = Prioriser::default();
7611        set_prio(&mut p, 1, 1, true);
7612        set_prio(&mut p, 3, 2, false);
7613        set_prio(&mut p, 5, 3, true);
7614
7615        let mut buf = vec![1u32, 3, 5];
7616        let count = p.apply_incremental_rotation(&mut buf);
7617        assert_eq!(count, 2);
7618        assert_eq!(buf, vec![1, 3, 5]);
7619    }
7620
7621    #[test]
7622    fn test_advance_incremental_cursor_none_is_noop() {
7623        // If no incremental stream fires (only non-incremental served), the
7624        // cursor must stay put so fairness is preserved for the next pass.
7625        let mut p = Prioriser::default();
7626        p.advance_incremental_cursor(Some(5));
7627        p.advance_incremental_cursor(None);
7628        assert_eq!(p.incremental_cursor, 5);
7629    }
7630
7631    #[test]
7632    fn test_apply_incremental_rotation_mixed_bucket_with_cursor() {
7633        // Same-urgency bucket with a mix: non-inc served first in ascending
7634        // order, then the incremental tail rotated by cursor.
7635        let mut p = Prioriser::default();
7636        set_prio(&mut p, 1, 3, true);
7637        set_prio(&mut p, 3, 3, false);
7638        set_prio(&mut p, 5, 3, true);
7639        set_prio(&mut p, 7, 3, false);
7640        set_prio(&mut p, 9, 3, true);
7641        p.advance_incremental_cursor(Some(5));
7642
7643        let mut buf = vec![1u32, 3, 5, 7, 9];
7644        let count = p.apply_incremental_rotation(&mut buf);
7645        assert_eq!(count, 3);
7646        // Non-inc (3, 7) first, then incremental rotated: cursor 5 means
7647        // next-after-5 = 9, then 1, then 5 (wrap).
7648        assert_eq!(buf, vec![3, 7, 9, 1, 5]);
7649    }
7650
7651    // ── H2FlowControl ───────────────────────────────────────────────────
7652
7653    #[test]
7654    fn test_flow_control_initial_state() {
7655        let fc = H2FlowControl {
7656            window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
7657            received_bytes_since_update: 0,
7658            pending_window_updates: HashMap::new(),
7659        };
7660        assert_eq!(fc.window, 65535);
7661        assert_eq!(fc.received_bytes_since_update, 0);
7662        assert!(fc.pending_window_updates.is_empty());
7663    }
7664
7665    #[test]
7666    fn test_flow_control_window_update_coalescing() {
7667        let mut updates: HashMap<u32, u32> = HashMap::new();
7668
7669        // First update for stream 1
7670        updates.insert(1, 1000);
7671        assert_eq!(*updates.get(&1).unwrap(), 1000);
7672
7673        // Coalesce second update for same stream
7674        if let Some(existing) = updates.get_mut(&1) {
7675            *existing = existing.saturating_add(500).min(i32::MAX as u32);
7676        }
7677        assert_eq!(*updates.get(&1).unwrap(), 1500);
7678
7679        // Different stream gets its own entry
7680        updates.insert(3, 2000);
7681        assert_eq!(updates.len(), 2);
7682        assert_eq!(*updates.get(&3).unwrap(), 2000);
7683    }
7684
7685    #[test]
7686    fn test_flow_control_window_update_saturation() {
7687        let mut updates: HashMap<u32, u32> = HashMap::new();
7688
7689        // Insert near max and coalesce — should saturate to i32::MAX
7690        let max_increment = i32::MAX as u32;
7691        updates.insert(1, max_increment - 100);
7692        if let Some(existing) = updates.get_mut(&1) {
7693            *existing = existing.saturating_add(200).min(max_increment);
7694        }
7695        assert_eq!(*updates.get(&1).unwrap(), max_increment);
7696    }
7697
7698    #[test]
7699    fn test_flow_control_connection_window_can_go_negative() {
7700        // RFC 9113 §6.9.2: connection-level window can go negative
7701        let mut fc = H2FlowControl {
7702            window: 100,
7703            received_bytes_since_update: 0,
7704            pending_window_updates: HashMap::new(),
7705        };
7706
7707        // Simulate consuming more than available
7708        fc.window -= 200;
7709        assert_eq!(fc.window, -100);
7710    }
7711
7712    // ── H2FloodConfig ───────────────────────────────────────────────────
7713
7714    #[test]
7715    fn test_flood_config_default_values() {
7716        let config = H2FloodConfig::default();
7717        assert_eq!(config.max_rst_stream_per_window, 100);
7718        assert_eq!(config.max_ping_per_window, 100);
7719        assert_eq!(config.max_settings_per_window, 50);
7720        assert_eq!(config.max_empty_data_per_window, 100);
7721        assert_eq!(config.max_continuation_frames, 20);
7722        assert_eq!(config.max_glitch_count, 100);
7723        assert_eq!(config.max_rst_stream_lifetime, 10_000);
7724        assert_eq!(config.max_rst_stream_abusive_lifetime, 50);
7725        assert_eq!(config.max_header_list_size, MAX_HEADER_LIST_SIZE as u32);
7726    }
7727
7728    // ── distribute_overhead ─────────────────────────────────────────────
7729
7730    #[test]
7731    fn test_distribute_overhead_proportional() {
7732        let mut metrics = SessionMetrics::new(None);
7733        let mut overhead_bin = 1000;
7734        let mut overhead_bout = 500;
7735
7736        // Stream transferred 60% of total bytes (not last stream)
7737        distribute_overhead(
7738            &mut metrics,
7739            &mut overhead_bin,
7740            &mut overhead_bout,
7741            (600, 300),  // stream_bytes
7742            (1000, 500), // total_bytes
7743            2,           // active_streams
7744            false,       // is_last_stream
7745        );
7746
7747        assert_eq!(metrics.bin, 600); // 60% of 1000
7748        assert_eq!(metrics.bout, 300); // 60% of 500
7749        assert_eq!(overhead_bin, 400); // 1000 - 600
7750        assert_eq!(overhead_bout, 200); // 500 - 300
7751    }
7752
7753    #[test]
7754    fn test_distribute_overhead_even_split_when_no_bytes() {
7755        let mut metrics = SessionMetrics::new(None);
7756        let mut overhead_bin = 100;
7757        let mut overhead_bout = 200;
7758
7759        // No bytes transferred -> even distribution (not last stream)
7760        distribute_overhead(
7761            &mut metrics,
7762            &mut overhead_bin,
7763            &mut overhead_bout,
7764            (0, 0), // stream_bytes
7765            (0, 0), // total_bytes
7766            4,      // active_streams
7767            false,  // is_last_stream
7768        );
7769
7770        assert_eq!(metrics.bin, 25); // 100 / 4
7771        assert_eq!(metrics.bout, 50); // 200 / 4
7772        assert_eq!(overhead_bin, 75);
7773        assert_eq!(overhead_bout, 150);
7774    }
7775
7776    #[test]
7777    fn test_distribute_overhead_clamps_to_remaining() {
7778        let mut metrics = SessionMetrics::new(None);
7779        let mut overhead_bin = 10;
7780        let mut overhead_bout = 10;
7781
7782        // Stream claims 100% of bytes but overhead is small (last stream)
7783        distribute_overhead(
7784            &mut metrics,
7785            &mut overhead_bin,
7786            &mut overhead_bout,
7787            (1000, 1000), // stream_bytes
7788            (1000, 1000), // total_bytes
7789            1,            // active_streams
7790            true,         // is_last_stream
7791        );
7792
7793        assert_eq!(metrics.bin, 10);
7794        assert_eq!(metrics.bout, 10);
7795        assert_eq!(overhead_bin, 0);
7796        assert_eq!(overhead_bout, 0);
7797    }
7798
7799    #[test]
7800    fn test_distribute_overhead_zero_active_streams() {
7801        let mut metrics = SessionMetrics::new(None);
7802        let mut overhead_bin = 100;
7803        let mut overhead_bout = 100;
7804
7805        // 0 active streams (edge case) — last stream gets all remainder
7806        distribute_overhead(
7807            &mut metrics,
7808            &mut overhead_bin,
7809            &mut overhead_bout,
7810            (0, 0),
7811            (0, 0),
7812            0,
7813            true,
7814        );
7815
7816        assert_eq!(metrics.bin, 100); // last stream gets all remaining
7817        assert_eq!(metrics.bout, 100);
7818        assert_eq!(overhead_bin, 0);
7819        assert_eq!(overhead_bout, 0);
7820    }
7821
7822    #[test]
7823    fn test_distribute_overhead_last_stream_gets_remainder() {
7824        let mut metrics1 = SessionMetrics::new(None);
7825        let mut metrics2 = SessionMetrics::new(None);
7826        let mut overhead_bin = 120;
7827        let mut overhead_bout = 120;
7828
7829        // First stream (not last): gets proportional share
7830        distribute_overhead(
7831            &mut metrics1,
7832            &mut overhead_bin,
7833            &mut overhead_bout,
7834            (100, 100), // stream_bytes
7835            (300, 300), // total_bytes
7836            3,          // active_streams
7837            false,      // is_last_stream
7838        );
7839
7840        let remaining_bin = overhead_bin;
7841        let remaining_bout = overhead_bout;
7842
7843        // Last stream: gets ALL remaining overhead (no rounding loss)
7844        distribute_overhead(
7845            &mut metrics2,
7846            &mut overhead_bin,
7847            &mut overhead_bout,
7848            (100, 100), // stream_bytes
7849            (300, 300), // total_bytes
7850            3,          // active_streams
7851            true,       // is_last_stream
7852        );
7853
7854        assert_eq!(metrics2.bin, remaining_bin);
7855        assert_eq!(metrics2.bout, remaining_bout);
7856        assert_eq!(overhead_bin, 0, "no remainder bytes should be lost");
7857        assert_eq!(overhead_bout, 0, "no remainder bytes should be lost");
7858    }
7859
7860    // ── H2FlowControl (additional edge cases) ─────────────────────────
7861
7862    #[test]
7863    fn test_flow_control_queue_window_update_cap() {
7864        // Verify DEFAULT_MAX_PENDING_WINDOW_UPDATES reflects 1 + 4*MAX_CONCURRENT_STREAMS
7865        assert_eq!(DEFAULT_MAX_PENDING_WINDOW_UPDATES, 1 + 100 * 4);
7866
7867        // Simulate queue reaching capacity
7868        let cap = DEFAULT_MAX_PENDING_WINDOW_UPDATES;
7869        let mut updates: HashMap<u32, u32> = HashMap::new();
7870        for i in 0..cap as u32 {
7871            updates.insert(i, 1000);
7872        }
7873        assert_eq!(updates.len(), cap);
7874
7875        // A new stream ID beyond capacity should be rejected
7876        let next_stream = cap as u32;
7877        let at_cap = updates.len() >= cap;
7878        assert!(at_cap);
7879        assert!(!updates.contains_key(&next_stream));
7880
7881        // Verify custom max_concurrent_streams produces proportional cap
7882        let custom_cap = 1 + 500_usize * 4;
7883        assert_eq!(custom_cap, 2001);
7884    }
7885
7886    #[test]
7887    fn test_h2_connection_config_defaults() {
7888        let config = H2ConnectionConfig::default();
7889        assert_eq!(config.initial_connection_window, ENLARGED_CONNECTION_WINDOW);
7890        assert_eq!(
7891            config.max_concurrent_streams,
7892            DEFAULT_MAX_CONCURRENT_STREAMS
7893        );
7894        assert_eq!(config.stream_shrink_ratio, 2);
7895    }
7896
7897    #[test]
7898    fn test_h2_connection_config_clamp_window_lower_bound() {
7899        // Below minimum: clamped to DEFAULT_INITIAL_WINDOW_SIZE (65535)
7900        let config = H2ConnectionConfig::new(100, 100, 2);
7901        assert_eq!(
7902            config.initial_connection_window,
7903            DEFAULT_INITIAL_WINDOW_SIZE
7904        );
7905    }
7906
7907    #[test]
7908    fn test_h2_connection_config_clamp_window_upper_bound() {
7909        // Above maximum: clamped to FLOW_CONTROL_MAX_WINDOW (2^31-1)
7910        let config = H2ConnectionConfig::new(u32::MAX, 100, 2);
7911        assert_eq!(config.initial_connection_window, FLOW_CONTROL_MAX_WINDOW);
7912    }
7913
7914    #[test]
7915    fn test_h2_connection_config_clamp_window_exact_minimum() {
7916        // Exactly minimum: no clamping, no zero-increment WINDOW_UPDATE risk
7917        let config = H2ConnectionConfig::new(DEFAULT_INITIAL_WINDOW_SIZE, 100, 2);
7918        assert_eq!(
7919            config.initial_connection_window,
7920            DEFAULT_INITIAL_WINDOW_SIZE
7921        );
7922        // Increment to send would be 0 — the code guards this with `if increment > 0`
7923        let increment = config
7924            .initial_connection_window
7925            .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
7926        assert_eq!(increment, 0);
7927    }
7928
7929    #[test]
7930    fn test_h2_connection_config_clamp_shrink_ratio() {
7931        // Below minimum: clamped to 2 (1 would defeat recycling)
7932        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 0);
7933        assert_eq!(config.stream_shrink_ratio, 2);
7934        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 1);
7935        assert_eq!(config.stream_shrink_ratio, 2);
7936    }
7937
7938    #[test]
7939    fn test_h2_connection_config_clamp_concurrent_streams() {
7940        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 0, 2);
7941        assert_eq!(config.max_concurrent_streams, 1);
7942    }
7943
7944    #[test]
7945    fn test_h2_connection_config_from_optional_uses_defaults() {
7946        let config = H2ConnectionConfig::from_optional(None, None, None);
7947        let defaults = H2ConnectionConfig::default();
7948        assert_eq!(config, defaults);
7949    }
7950
7951    #[test]
7952    fn test_h2_connection_config_from_optional_overrides() {
7953        let config = H2ConnectionConfig::from_optional(Some(2_000_000), Some(500), Some(4));
7954        assert_eq!(config.initial_connection_window, 2_000_000);
7955        assert_eq!(config.max_concurrent_streams, 500);
7956        assert_eq!(config.stream_shrink_ratio, 4);
7957    }
7958
7959    #[test]
7960    fn test_flow_control_window_settings_change_negative() {
7961        // RFC 9113 §6.9.2: A change to SETTINGS_INITIAL_WINDOW_SIZE can cause
7962        // the flow-control window to become negative.
7963        let mut fc = H2FlowControl {
7964            window: 100,
7965            received_bytes_since_update: 0,
7966            pending_window_updates: HashMap::new(),
7967        };
7968
7969        // Simulate SETTINGS_INITIAL_WINDOW_SIZE reduction:
7970        // old_initial = 65535, new_initial = 10 => delta = 10 - 65535 = -65525
7971        let old_initial: i32 = DEFAULT_INITIAL_WINDOW_SIZE as i32;
7972        let new_initial: i32 = 10;
7973        let delta = new_initial - old_initial; // -65525
7974        fc.window += delta;
7975
7976        assert!(
7977            fc.window < 0,
7978            "Window must be able to go negative after settings change"
7979        );
7980        assert_eq!(fc.window, 100 + (10 - 65535));
7981    }
7982
7983    #[test]
7984    fn test_flow_control_coalesce_saturates_at_max_increment() {
7985        let max_increment = i32::MAX as u32;
7986        let mut updates: HashMap<u32, u32> = HashMap::new();
7987
7988        // Insert at max and try to coalesce more
7989        updates.insert(1, max_increment);
7990        if let Some(existing) = updates.get_mut(&1) {
7991            *existing = existing.saturating_add(1000).min(max_increment);
7992        }
7993        assert_eq!(*updates.get(&1).unwrap(), max_increment);
7994    }
7995
7996    // ── H2FloodConfig (additional) ───────────────────────────────────
7997
7998    #[test]
7999    fn test_flood_config_default_matches_constants() {
8000        let config = H2FloodConfig::default();
8001        assert_eq!(
8002            config.max_rst_stream_per_window,
8003            DEFAULT_MAX_RST_STREAM_PER_WINDOW
8004        );
8005        assert_eq!(config.max_ping_per_window, DEFAULT_MAX_PING_PER_WINDOW);
8006        assert_eq!(
8007            config.max_settings_per_window,
8008            DEFAULT_MAX_SETTINGS_PER_WINDOW
8009        );
8010        assert_eq!(
8011            config.max_empty_data_per_window,
8012            DEFAULT_MAX_EMPTY_DATA_PER_WINDOW
8013        );
8014        assert_eq!(
8015            config.max_continuation_frames,
8016            DEFAULT_MAX_CONTINUATION_FRAMES
8017        );
8018        assert_eq!(config.max_glitch_count, DEFAULT_MAX_GLITCH_COUNT);
8019    }
8020
8021    #[test]
8022    fn test_flood_config_equality() {
8023        let config_a = H2FloodConfig::default();
8024        let config_b = H2FloodConfig::default();
8025        assert_eq!(config_a, config_b);
8026
8027        let config_c = H2FloodConfig {
8028            max_rst_stream_per_window: 1,
8029            ..H2FloodConfig::default()
8030        };
8031        assert_ne!(config_a, config_c);
8032    }
8033
8034    // ── distribute_overhead (additional edge cases) ───────────────────
8035
8036    #[test]
8037    fn test_distribute_overhead_asymmetric_in_out() {
8038        let mut metrics = SessionMetrics::new(None);
8039        let mut overhead_bin = 1000;
8040        let mut overhead_bout = 1000;
8041
8042        // Stream transferred 100% inbound, 0% outbound (not last stream)
8043        distribute_overhead(
8044            &mut metrics,
8045            &mut overhead_bin,
8046            &mut overhead_bout,
8047            (500, 0),   // stream_bytes
8048            (500, 100), // total_bytes
8049            2,          // active_streams
8050            false,      // is_last_stream
8051        );
8052
8053        assert_eq!(metrics.bin, 1000); // 100% of inbound overhead
8054        assert_eq!(metrics.bout, 0); // 0% of outbound overhead
8055        assert_eq!(overhead_bin, 0);
8056        assert_eq!(overhead_bout, 1000);
8057    }
8058
8059    #[test]
8060    fn test_distribute_overhead_many_streams_accumulate() {
8061        let mut metrics = SessionMetrics::new(None);
8062        let mut overhead_bin = 120;
8063        let mut overhead_bout = 120;
8064
8065        // Three equal streams, each calling distribute_overhead.
8066        // With is_last_stream on the third call, the last stream gets all
8067        // remaining overhead, so no rounding loss occurs.
8068        //   call 1: 120 * 100/300 = 40 -> remaining 80
8069        //   call 2:  80 * 100/300 = 26 -> remaining 54
8070        //   call 3: last stream gets all remaining = 54
8071        // Total distributed: 40 + 26 + 54 = 120 (no loss)
8072        for i in 0..3 {
8073            distribute_overhead(
8074                &mut metrics,
8075                &mut overhead_bin,
8076                &mut overhead_bout,
8077                (100, 100), // stream_bytes
8078                (300, 300), // total_bytes
8079                3,          // active_streams
8080                i == 2,     // is_last_stream on final call
8081            );
8082        }
8083
8084        assert_eq!(metrics.bin, 120);
8085        assert_eq!(metrics.bout, 120);
8086        // No rounding residual — last stream absorbed the remainder
8087        assert_eq!(overhead_bin, 0);
8088        assert_eq!(overhead_bout, 0);
8089    }
8090
8091    // ── Hex chunk formatting ────────────────────────────────────────────
8092
8093    /// Verify that the Vec<u8> + write!() hex formatting used in
8094    /// handle_data_frame produces output identical to format!("{:x}").
8095    #[test]
8096    fn test_hex_chunk_length_formatting() {
8097        use std::io::Write as _;
8098
8099        let cases: &[(usize, &[u8])] = &[
8100            (1, b"1"),
8101            (15, b"f"),
8102            (16, b"10"),
8103            (255, b"ff"),
8104            (256, b"100"),
8105            (4096, b"1000"),
8106            (65535, b"ffff"),
8107            (65536, b"10000"),
8108        ];
8109
8110        for &(payload_len, expected) in cases {
8111            let mut buf = Vec::with_capacity(16);
8112            let _ = write!(buf, "{payload_len:x}");
8113            assert_eq!(
8114                buf, expected,
8115                "hex formatting mismatch for payload_len={payload_len}"
8116            );
8117        }
8118
8119        // usize::MAX tested separately to avoid temporary lifetime issue
8120        let max_expected = format!("{:x}", usize::MAX);
8121        let mut buf = Vec::with_capacity(16);
8122        let _ = write!(buf, "{:x}", usize::MAX);
8123        assert_eq!(buf, max_expected.as_bytes());
8124    }
8125
8126    // ── Stream-ID allocation / exhaustion ──────────────────────────────────
8127
8128    /// A fresh client connection starts with `last_stream_id == 0`. The first
8129    /// call MUST issue stream `1` (odd, RFC 9113 §5.1.1) and advance the
8130    /// watermark to `2`.
8131    #[test]
8132    fn test_next_stream_id_client_first_allocation() {
8133        let (issued, next) = next_stream_id(0, true).expect("fresh client must allocate");
8134        assert_eq!(issued, 1);
8135        assert_eq!(next, 2);
8136    }
8137
8138    /// Client allocation yields strictly increasing odd identifiers
8139    /// (1, 3, 5, ...) as required by RFC 9113 §5.1.1.
8140    #[test]
8141    fn test_next_stream_id_client_sequence_is_odd_and_monotonic() {
8142        let mut last = 0u32;
8143        let mut issued_ids = Vec::with_capacity(8);
8144        for _ in 0..8 {
8145            let (id, next) = next_stream_id(last, true).expect("unexhausted");
8146            assert_eq!(id & 1, 1, "client stream ids must be odd (RFC 9113 §5.1.1)");
8147            assert!(issued_ids.last().is_none_or(|prev: &u32| id > *prev));
8148            issued_ids.push(id);
8149            last = next;
8150        }
8151        assert_eq!(issued_ids, vec![1, 3, 5, 7, 9, 11, 13, 15]);
8152    }
8153
8154    /// Server-side allocation yields even identifiers. The helper
8155    /// convention is `watermark - 2` for server, `watermark - 1` for client,
8156    /// so both sides share the same monotonically-increasing even watermark.
8157    /// Sōzu never server-pushes, but the helper must be symmetric so push
8158    /// could be enabled without a regression.
8159    #[test]
8160    fn test_next_stream_id_server_is_even() {
8161        // `last = 2` means the most recent allocation advanced the watermark
8162        // to 2; server then issues `2 - 2 = 0`. This is an artefact of the
8163        // shared watermark and only matters in tests — server never uses it.
8164        let (issued, next) = next_stream_id(2, false).expect("server allocation");
8165        assert_eq!(issued & 1, 0, "server stream ids must be even");
8166        assert_eq!(next, 4);
8167        assert_eq!(issued, 2);
8168
8169        let (issued, next) = next_stream_id(next, false).expect("second slot");
8170        assert_eq!(issued, 4);
8171        assert_eq!(issued & 1, 0);
8172        assert_eq!(next, 6);
8173    }
8174
8175    /// The last client-issuable odd stream ID is `STREAM_ID_MAX = 0x7FFF_FFFF`.
8176    /// To issue it the watermark must advance to `STREAM_ID_MAX + 1 = 2³¹`;
8177    /// the caller therefore supplies `last = STREAM_ID_MAX - 1 = 0x7FFF_FFFE`.
8178    /// That call MUST succeed and return the max ID; the post-call watermark
8179    /// sits at `2³¹`, which is the sentinel that makes the next call fail.
8180    #[test]
8181    fn test_next_stream_id_client_final_slot_allocates() {
8182        let last = STREAM_ID_MAX - 1;
8183        let (issued, next) = next_stream_id(last, true).expect("final slot still allocates");
8184        assert_eq!(issued, STREAM_ID_MAX);
8185        assert_eq!(next, STREAM_ID_MAX + 1);
8186        // And the very next call MUST refuse rather than wrap.
8187        assert!(next_stream_id(next, true).is_none());
8188    }
8189
8190    /// Exhaustion case: once the client has issued stream ID `STREAM_ID_MAX`,
8191    /// the watermark sits at `STREAM_ID_MAX + 1`. The next request MUST return
8192    /// `None` — without this guard the helper would issue `STREAM_ID_MAX + 2`
8193    /// (wrapped down to an even id), which would (a) use the reserved
8194    /// high bit and (b) violate the odd-parity invariant for client streams.
8195    #[test]
8196    fn test_next_stream_id_client_exhausted_returns_none() {
8197        let last = STREAM_ID_MAX + 1;
8198        assert!(next_stream_id(last, true).is_none());
8199    }
8200
8201    /// Exhaustion via `checked_add` saturation: defence in depth in case a
8202    /// caller jumps `last_stream_id` close to `u32::MAX`. The helper must
8203    /// not panic nor overflow — it must return `None`.
8204    #[test]
8205    fn test_next_stream_id_saturates_near_u32_max() {
8206        assert!(next_stream_id(u32::MAX, true).is_none());
8207        assert!(next_stream_id(u32::MAX - 1, true).is_none());
8208    }
8209
8210    /// Server-side exhaustion: same guard, even-parity identifier space.
8211    #[test]
8212    fn test_next_stream_id_server_exhausted_returns_none() {
8213        let last = STREAM_ID_MAX + 1;
8214        assert!(next_stream_id(last, false).is_none());
8215    }
8216
8217    /// Regression guard: the helper must never issue a stream ID that
8218    /// exceeds `STREAM_ID_MAX` for either side, no matter where the
8219    /// watermark sits. This walks every value in a neighbourhood of the
8220    /// boundary to rule out off-by-one errors.
8221    #[test]
8222    fn test_next_stream_id_never_exceeds_stream_id_max() {
8223        for last in (STREAM_ID_MAX - 4)..=(STREAM_ID_MAX + 4) {
8224            for is_client in [true, false] {
8225                if let Some((issued, next)) = next_stream_id(last, is_client) {
8226                    assert!(
8227                        issued <= STREAM_ID_MAX,
8228                        "issued id {issued} exceeds STREAM_ID_MAX (last={last}, is_client={is_client})"
8229                    );
8230                    // `next` is the post-allocation watermark and may sit at
8231                    // STREAM_ID_MAX + 1 — the very next call must then return None.
8232                    if next > STREAM_ID_MAX {
8233                        assert!(
8234                            next_stream_id(next, is_client).is_none(),
8235                            "second call after final slot must report exhaustion"
8236                        );
8237                    }
8238                }
8239            }
8240        }
8241    }
8242
8243    /// The helper's `is_client` flag must cleanly split the ID space so that
8244    /// a client and a server peered on the same connection cannot collide.
8245    /// Given the same `last_stream_id`, the two parities must differ by 1.
8246    #[test]
8247    fn test_next_stream_id_client_server_parities_disjoint() {
8248        for last in [0u32, 2, 4, 10, 100, 1_000_000, STREAM_ID_MAX - 3] {
8249            let (client_id, _) = next_stream_id(last, true).unwrap();
8250            let (server_id, _) = next_stream_id(last, false).unwrap();
8251            assert_eq!(client_id & 1, 1);
8252            assert_eq!(server_id & 1, 0);
8253            assert_eq!(client_id.abs_diff(server_id), 1);
8254        }
8255    }
8256
8257    // ── LIFECYCLE §9 invariant 16: any_stream_id_matches ─────────────────
8258    //
8259    // Covers the iteration dispatch used by `any_stream_has_pending_back`.
8260    // Testing the probe directly against a synthetic closure keeps the
8261    // tests independent of the full `Stream` fixture (which requires a
8262    // `Pool` and a fully-built `HttpContext`).
8263
8264    #[test]
8265    fn test_any_stream_id_matches_empty_map_is_false() {
8266        let streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8267        assert!(!any_stream_id_matches(&streams, |_| true));
8268    }
8269
8270    #[test]
8271    fn test_any_stream_id_matches_all_probe_false_is_false() {
8272        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8273        streams.insert(1, 0);
8274        streams.insert(3, 1);
8275        streams.insert(5, 2);
8276        assert!(!any_stream_id_matches(&streams, |_| false));
8277    }
8278
8279    #[test]
8280    fn test_any_stream_id_matches_any_probe_true_is_true() {
8281        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8282        streams.insert(1, 0);
8283        streams.insert(3, 1);
8284        streams.insert(5, 2);
8285        // Probe is true only for GlobalStreamId == 1 (i.e. StreamId 3).
8286        assert!(any_stream_id_matches(&streams, |gid| gid == 1));
8287    }
8288
8289    #[test]
8290    fn test_any_stream_id_matches_single_entry() {
8291        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8292        streams.insert(42, 7);
8293        assert!(any_stream_id_matches(&streams, |gid| gid == 7));
8294        assert!(!any_stream_id_matches(&streams, |gid| gid == 8));
8295    }
8296
8297    #[test]
8298    fn test_any_stream_id_matches_short_circuits() {
8299        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8300        streams.insert(1, 0);
8301        streams.insert(3, 1);
8302        streams.insert(5, 2);
8303        streams.insert(7, 3);
8304        let mut calls = 0usize;
8305        let result = any_stream_id_matches(&streams, |_| {
8306            calls += 1;
8307            true
8308        });
8309        assert!(result);
8310        // `Iterator::any` short-circuits on the first `true` — so the probe
8311        // must fire at most once in this construction.
8312        assert_eq!(calls, 1);
8313    }
8314
8315    // ── cumulative-stall budget decision (fc_stall_budget_decision) ──
8316
8317    #[test]
8318    fn test_fc_stall_budget_open_window_always_clears() {
8319        // A genuinely open send window is a real un-stall, regardless of prior
8320        // accumulated progress or this pass's drain.
8321        assert_eq!(
8322            fc_stall_budget_decision(false, 0, None),
8323            FcStallAction::Clear
8324        );
8325        assert_eq!(
8326            fc_stall_budget_decision(false, 1, Some(5)),
8327            FcStallAction::Clear
8328        );
8329        assert_eq!(
8330            fc_stall_budget_decision(false, i32::MAX, Some(FC_STALL_CLEAR_FLOOR)),
8331            FcStallAction::Clear
8332        );
8333    }
8334
8335    #[test]
8336    fn test_fc_stall_budget_blocked_arms_and_accumulates() {
8337        // First blocked pass arms with this pass's drain.
8338        assert_eq!(
8339            fc_stall_budget_decision(true, 1, None),
8340            FcStallAction::Arm { progress: 1 }
8341        );
8342        // A blocked pass with no drain keeps the accumulator unchanged, so the
8343        // deadline keeps aging (a window-0 stall makes consumed == 0).
8344        assert_eq!(
8345            fc_stall_budget_decision(true, 0, Some(42)),
8346            FcStallAction::Arm { progress: 42 }
8347        );
8348        // Negative `consumed` is clamped to 0 (defensive; converter.window only
8349        // shrinks, so consumed is >= 0 in practice).
8350        assert_eq!(
8351            fc_stall_budget_decision(true, -10, Some(7)),
8352            FcStallAction::Arm { progress: 7 }
8353        );
8354    }
8355
8356    #[test]
8357    fn test_fc_stall_budget_floor_clears() {
8358        // Reaching the floor in a single pass (a full DATA frame of real
8359        // delivery) clears the deadline.
8360        assert_eq!(
8361            fc_stall_budget_decision(true, FC_STALL_CLEAR_FLOOR as i32, None),
8362            FcStallAction::Clear
8363        );
8364        // Exactly one byte below the floor still arms.
8365        assert_eq!(
8366            fc_stall_budget_decision(true, (FC_STALL_CLEAR_FLOOR - 1) as i32, None),
8367            FcStallAction::Arm {
8368                progress: FC_STALL_CLEAR_FLOOR - 1
8369            }
8370        );
8371        // Prior progress plus this pass crossing the floor clears.
8372        assert_eq!(
8373            fc_stall_budget_decision(true, 1, Some(FC_STALL_CLEAR_FLOOR - 1)),
8374            FcStallAction::Clear
8375        );
8376    }
8377
8378    #[test]
8379    fn test_fc_stall_budget_wu_drip_ages_until_floor() {
8380        // The WINDOW_UPDATE(+1) closure: a 1-byte-per-pass drip must keep the
8381        // deadline armed (aging) for the whole run up to the floor and only
8382        // clear on the pass that reaches it — so a drip granting < floor bytes
8383        // per idle period is reaped, never kept alive. This is the unit-level
8384        // proof that the budget closes the WINDOW_UPDATE-drip vector.
8385        let mut progress: Option<usize> = None;
8386        for pass in 1..FC_STALL_CLEAR_FLOOR {
8387            match fc_stall_budget_decision(true, 1, progress) {
8388                FcStallAction::Arm { progress: p } => {
8389                    assert_eq!(p, pass, "drip accumulator off at pass {pass}");
8390                    progress = Some(p);
8391                }
8392                FcStallAction::Clear => panic!("drip cleared the deadline early at pass {pass}"),
8393            }
8394        }
8395        // The pass that reaches the floor finally clears.
8396        assert_eq!(
8397            fc_stall_budget_decision(true, 1, progress),
8398            FcStallAction::Clear
8399        );
8400    }
8401
8402    // ── flow-control-stall reaper union (collect_timed_out_streams) ──
8403
8404    #[test]
8405    fn test_collect_timed_out_streams_reaps_fc_stall_despite_fresh_liveness() {
8406        // A window-stalled stream MUST be reaped on the flow-control-stall
8407        // deadline even if its bidirectional-liveness timer is fresh — an
8408        // inbound 1-byte DATA drip keeps `last_activity` warm but never touches
8409        // `fc_stalled`. Without the `fc_stalled` guard this stream is never
8410        // reaped (the pre-fix window-stall hold).
8411        let now = Instant::now();
8412        let deadline = std::time::Duration::from_secs(2);
8413        let mut live = HashMap::new();
8414        live.insert(7u32, 0usize);
8415        let rst_sent = HashSet::new();
8416        let mut last_activity = HashMap::new();
8417        last_activity.insert(7u32, now); // fresh: just received an inbound DATA drip
8418        let mut fc_stalled = HashMap::new();
8419        fc_stalled.insert(7u32, now - std::time::Duration::from_secs(5));
8420        let out =
8421            collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline);
8422        assert_eq!(out, vec![(7u32, "H2::WindowStall")]);
8423    }
8424
8425    #[test]
8426    fn test_collect_timed_out_streams_idle_dedup_and_filters() {
8427        let now = Instant::now();
8428        let deadline = std::time::Duration::from_secs(2);
8429        let old = now - std::time::Duration::from_secs(5);
8430        let mut live = HashMap::new();
8431        for sid in [1u32, 3, 5, 9] {
8432            live.insert(sid, 0usize);
8433        }
8434        let mut rst_sent = HashSet::new();
8435        rst_sent.insert(9u32); // already resetting -> excluded
8436        let mut last_activity = HashMap::new();
8437        last_activity.insert(1u32, old); // idle past deadline
8438        last_activity.insert(3u32, now); // fresh -> survives
8439        last_activity.insert(5u32, old); // idle AND fc-stalled -> dedup to one entry
8440        last_activity.insert(9u32, old); // idle but rst_sent -> excluded
8441        last_activity.insert(11u32, old); // not a live stream -> excluded
8442        let mut fc_stalled = HashMap::new();
8443        fc_stalled.insert(5u32, old);
8444        let mut out =
8445            collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline);
8446        out.sort();
8447        assert_eq!(
8448            out,
8449            vec![(1u32, "H2::IdleTimeout"), (5u32, "H2::IdleTimeout")]
8450        );
8451    }
8452
8453    #[test]
8454    fn test_collect_timed_out_streams_empty_when_all_fresh() {
8455        let now = Instant::now();
8456        let deadline = std::time::Duration::from_secs(2);
8457        let mut live = HashMap::new();
8458        live.insert(1u32, 0usize);
8459        let rst_sent = HashSet::new();
8460        let mut last_activity = HashMap::new();
8461        last_activity.insert(1u32, now);
8462        let mut fc_stalled = HashMap::new();
8463        fc_stalled.insert(1u32, now);
8464        assert!(
8465            collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline)
8466                .is_empty()
8467        );
8468    }
8469
8470    // ── LIFECYCLE §9 invariant 16: any_stream_has_pending_back ───────────
8471
8472    /// Build a minimal `Stream` for invariant-16 probing. Uses the pool
8473    /// plumbing so `back.blocks` / `back.out` exist; every other field is
8474    /// default-valued because the predicate only reads the back buffer.
8475    fn make_stream_for_invariant_16(pool: &Rc<RefCell<Pool>>, session_ulid: Ulid) -> Stream {
8476        let http_ctx = HttpContext {
8477            keep_alive_backend: true,
8478            keep_alive_frontend: true,
8479            sticky_session_found: None,
8480            method: None,
8481            authority: None,
8482            path: None,
8483            status: None,
8484            reason: None,
8485            user_agent: None,
8486            x_request_id: None,
8487            xff_chain: None,
8488            #[cfg(feature = "opentelemetry")]
8489            otel: None,
8490            closing: false,
8491            session_id: session_ulid,
8492            id: Ulid::generate(),
8493            backend_id: None,
8494            cluster_id: None,
8495            protocol: Protocol::HTTPS,
8496            public_address: "127.0.0.1:0".parse().unwrap(),
8497            session_address: None,
8498            sticky_name: String::new(),
8499            sticky_session: None,
8500            backend_address: None,
8501            tls_server_name: None,
8502            tls_cert_names: None,
8503            strict_sni_binding: false,
8504            elide_x_real_ip: false,
8505            send_x_real_ip: false,
8506            tls_version: None,
8507            tls_cipher: None,
8508            tls_alpn: None,
8509            sozu_id_header: String::from("Sozu-Id"),
8510            redirect_location: None,
8511            www_authenticate: None,
8512            original_authority: None,
8513            headers_response: Vec::new(),
8514            retry_after_seconds: None,
8515            frontend_redirect_template: None,
8516            redirect_status: None,
8517            access_log_message: None,
8518        };
8519        Stream::new(Rc::downgrade(pool), http_ctx, 65_535)
8520            .expect("pool should have capacity for two buffers")
8521    }
8522
8523    fn make_pool_for_invariant_16() -> Rc<RefCell<Pool>> {
8524        // Two buffer slots per stream (front + back), ten stream slots is
8525        // plenty for the tests below.
8526        Rc::new(RefCell::new(Pool::with_capacity(4, 20, 16_384)))
8527    }
8528
8529    #[test]
8530    fn test_any_stream_has_pending_back_empty_map_is_false() {
8531        let pool = make_pool_for_invariant_16();
8532        let ulid = Ulid::generate();
8533        let streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8534        let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
8535        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8536    }
8537
8538    #[test]
8539    fn test_any_stream_has_pending_back_all_drained_is_false() {
8540        let pool = make_pool_for_invariant_16();
8541        let ulid = Ulid::generate();
8542        let context_streams = vec![
8543            make_stream_for_invariant_16(&pool, ulid),
8544            make_stream_for_invariant_16(&pool, ulid),
8545        ];
8546        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8547        streams_map.insert(1, 0);
8548        streams_map.insert(3, 1);
8549        // Both freshly-built streams have empty back.out and back.blocks
8550        // (Kawa::new starts with empty deques).
8551        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8552    }
8553
8554    #[test]
8555    fn test_any_stream_has_pending_back_unknown_gid_is_false() {
8556        // LIFECYCLE invariant 16 defence-in-depth: an unknown
8557        // `GlobalStreamId` during a stream-removal race must not panic;
8558        // `.get()` must short-circuit to `false`.
8559        let pool = make_pool_for_invariant_16();
8560        let ulid = Ulid::generate();
8561        let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
8562        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8563        // GlobalStreamId 42 is out of range for the 1-element slice above.
8564        streams_map.insert(7, 42);
8565        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8566    }
8567
8568    #[test]
8569    fn test_any_stream_has_pending_back_with_pending_blocks_is_true() {
8570        let pool = make_pool_for_invariant_16();
8571        let ulid = Ulid::generate();
8572        let mut stream = make_stream_for_invariant_16(&pool, ulid);
8573        // Push one dummy block — any Block variant is fine; the predicate
8574        // only checks `blocks.is_empty()`.
8575        stream.back.blocks.push_back(kawa::Block::StatusLine);
8576        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8577        streams_map.insert(1, 0);
8578        assert!(any_stream_has_pending_back(&streams_map, &[stream]));
8579    }
8580
8581    #[test]
8582    fn test_any_stream_has_pending_back_with_pending_out_is_true() {
8583        let pool = make_pool_for_invariant_16();
8584        let ulid = Ulid::generate();
8585        let mut stream = make_stream_for_invariant_16(&pool, ulid);
8586        // Non-empty out buffer with no blocks.
8587        stream
8588            .back
8589            .out
8590            .push_back(kawa::OutBlock::Store(kawa::Store::Static(b"partial frame")));
8591        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8592        streams_map.insert(1, 0);
8593        assert!(any_stream_has_pending_back(&streams_map, &[stream]));
8594    }
8595
8596    // ── ready_incremental_by_urgency mid-pass consistency ────────────────
8597    //
8598    // The full RED is in e2e and currently #[ignore]'d (timing-sensitive).
8599    // The scalar logic below pins the saturating_sub + bucket-scoped
8600    // decrement contract the scheduler at h2.rs:2412-2414 + h2.rs:2481
8601    // relies on: a same-urgency transition-to-ineligible MUST drop the
8602    // per-bucket count by exactly 1 and never underflow the u64.
8603
8604    fn make_bucket(counts: &[(u8, usize)]) -> HashMap<u8, usize> {
8605        counts.iter().copied().collect()
8606    }
8607
8608    #[test]
8609    fn ready_incremental_bucket_decrement_reduces_same_urgency_only() {
8610        let mut map = make_bucket(&[(1, 3), (3, 2)]);
8611        let urgency: u8 = 1;
8612        let is_incremental = true;
8613        // Simulate a stream in urgency=1 going ineligible mid-pass.
8614        if is_incremental {
8615            if let Some(c) = map.get_mut(&urgency) {
8616                *c = c.saturating_sub(1);
8617            }
8618        }
8619        assert_eq!(map.get(&1), Some(&2), "urgency-1 bucket must drop to 2");
8620        assert_eq!(map.get(&3), Some(&2), "urgency-3 bucket untouched");
8621    }
8622
8623    #[test]
8624    fn ready_incremental_bucket_decrement_saturates_at_zero() {
8625        let mut map = make_bucket(&[(0, 0)]);
8626        let urgency: u8 = 0;
8627        if let Some(c) = map.get_mut(&urgency) {
8628            *c = c.saturating_sub(1);
8629        }
8630        assert_eq!(map.get(&0), Some(&0), "saturating_sub must not underflow");
8631    }
8632
8633    #[test]
8634    fn ready_incremental_bucket_decrement_skipped_for_non_incremental() {
8635        let mut map = make_bucket(&[(1, 3)]);
8636        let is_incremental = false;
8637        if is_incremental {
8638            if let Some(c) = map.get_mut(&1) {
8639                *c = c.saturating_sub(1);
8640            }
8641        }
8642        assert_eq!(
8643            map.get(&1),
8644            Some(&3),
8645            "non-incremental transitions must not touch the bucket"
8646        );
8647    }
8648
8649    // ── enqueue_rst: queue / dedupe / counter / arm invariants ───────────
8650    //
8651    // `enqueue_rst_into` is the free-function primitive shared by all three
8652    // RST push sites (DATA-on-closed, refuse_stream_and_discard,
8653    // reset_stream). The method delegates; the invariants live here.
8654
8655    #[test]
8656    fn test_enqueue_rst_into_populates_queue_and_dedupe() {
8657        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8658        let mut total: usize = 0;
8659        let mut sent: HashSet<StreamId> = HashSet::new();
8660        let mut readiness = Readiness::new();
8661
8662        let first = enqueue_rst_into(
8663            &mut pending,
8664            &mut total,
8665            &mut sent,
8666            &mut readiness,
8667            5,
8668            H2Error::ProtocolError,
8669        );
8670        assert!(first, "first call must report freshly_queued = true");
8671        // Second call for the same stream must be a no-op AND return
8672        // false so accounting in `Self::enqueue_rst` skips this case.
8673        let second = enqueue_rst_into(
8674            &mut pending,
8675            &mut total,
8676            &mut sent,
8677            &mut readiness,
8678            5,
8679            H2Error::InternalError,
8680        );
8681        assert!(
8682            !second,
8683            "second call for same stream must return freshly_queued = false"
8684        );
8685
8686        assert_eq!(pending.len(), 1, "dedupe must collapse to a single entry");
8687        assert_eq!(
8688            pending[0],
8689            (5, H2Error::ProtocolError),
8690            "the first error wins — second push is ignored"
8691        );
8692        assert_eq!(total, 1, "queued-cap counter must bump exactly once");
8693        assert!(sent.contains(&5), "rst_sent must record the id");
8694    }
8695
8696    #[test]
8697    fn test_enqueue_rst_into_bumps_total_for_distinct_ids() {
8698        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8699        let mut total: usize = 0;
8700        let mut sent: HashSet<StreamId> = HashSet::new();
8701        let mut readiness = Readiness::new();
8702
8703        for sid in [1u32, 3, 5, 7] {
8704            enqueue_rst_into(
8705                &mut pending,
8706                &mut total,
8707                &mut sent,
8708                &mut readiness,
8709                sid,
8710                H2Error::ProtocolError,
8711            );
8712        }
8713
8714        assert_eq!(pending.len(), 4);
8715        assert_eq!(total, 4);
8716        assert_eq!(sent.len(), 4);
8717    }
8718
8719    #[test]
8720    fn test_enqueue_rst_into_arms_writable_in_invariant_15_form() {
8721        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8722        let mut total: usize = 0;
8723        let mut sent: HashSet<StreamId> = HashSet::new();
8724        let mut readiness = Readiness::new();
8725
8726        // Precondition: no WRITABLE bits set.
8727        assert!(!readiness.interest.is_writable());
8728        assert!(!readiness.event.is_writable());
8729
8730        enqueue_rst_into(
8731            &mut pending,
8732            &mut total,
8733            &mut sent,
8734            &mut readiness,
8735            9,
8736            H2Error::FlowControlError,
8737        );
8738
8739        // Postcondition: invariant-15 — both `interest` and `event` WRITABLE
8740        // are raised so the next tick runs `writable()` under edge-triggered
8741        // epoll.
8742        assert!(
8743            readiness.interest.is_writable(),
8744            "arm_writable must raise the interest bit"
8745        );
8746        assert!(
8747            readiness.event.is_writable(),
8748            "arm_writable must raise the event bit (edge-triggered epoll)"
8749        );
8750    }
8751
8752    #[test]
8753    fn test_enqueue_rst_into_dedupe_does_not_rearm_writable() {
8754        // Dedupe is a pure short-circuit: if the stream id is already in
8755        // `rst_sent`, we do not touch the readiness. This matters because
8756        // a re-entrant reset_stream call during a cascading error path
8757        // would otherwise re-raise WRITABLE unnecessarily — harmless but
8758        // noisy in metrics.
8759        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8760        let mut total: usize = 0;
8761        let mut sent: HashSet<StreamId> = HashSet::new();
8762        sent.insert(11);
8763        let mut readiness = Readiness::new();
8764
8765        enqueue_rst_into(
8766            &mut pending,
8767            &mut total,
8768            &mut sent,
8769            &mut readiness,
8770            11,
8771            H2Error::ProtocolError,
8772        );
8773
8774        assert!(
8775            pending.is_empty(),
8776            "already-sent ids must not queue a second frame"
8777        );
8778        assert_eq!(total, 0);
8779        assert!(!readiness.interest.is_writable());
8780        assert!(!readiness.event.is_writable());
8781    }
8782
8783    // ── forcefully_terminate_answer arms WRITABLE for ET epoll ───────────
8784    //
8785    // Gap A in the h2spec diagnosis: the pre-fix code set `interest` but
8786    // never raised `event`, so `filter_interest() = event & interest` was
8787    // zero and `writable()` was never scheduled. This test pins the fix.
8788
8789    #[test]
8790    fn test_forcefully_terminate_answer_arms_event_and_interest() {
8791        let pool = make_pool_for_invariant_16();
8792        let ulid = Ulid::generate();
8793        let mut stream = make_stream_for_invariant_16(&pool, ulid);
8794        let mut readiness = Readiness::new();
8795
8796        assert!(!readiness.interest.is_writable());
8797        assert!(!readiness.event.is_writable());
8798
8799        forcefully_terminate_answer(&mut stream, &mut readiness, H2Error::ProtocolError);
8800
8801        assert!(
8802            readiness.interest.is_writable(),
8803            "forcefully_terminate_answer must set the WRITABLE interest bit"
8804        );
8805        assert!(
8806            readiness.event.is_writable(),
8807            "forcefully_terminate_answer must set the WRITABLE event bit — \
8808             without this, filter_interest() = 0 under edge-triggered epoll \
8809             and writable() is never scheduled (h2spec Gap A)"
8810        );
8811    }
8812}
sozu_lib/protocol/mux/h2.rs

sozu_lib/protocol/mux/
h2.rs