sozu_lib/protocol/mux/
h2.rs

1//! H2 mux connection wrapper (RFC 9113).
2//!
3//! Owns wire-side connection state: HPACK encoder/decoder, peer settings,
4//! flow window, GOAWAY/RST attribution, and the [`H2FloodDetector`] backing
5//! the CVE-2023-44487 / CVE-2024-27316 / CVE-2025-8671 mitigations. Stream
6//! storage lives in the sibling `Context<L>` (`mux/mod.rs`); this module is
7//! the canonical home for the edge-trigger discipline — paths that queue
8//! bytes for a later event-loop pass must arm writable / signal pending
9//! write (cf. `arm_writable()` at the deferred-control-frame sites and
10//! `lib/src/lib.rs:1006`-`1010`).
11
12use std::{
13    cmp::min,
14    collections::{HashMap, HashSet},
15    io::{IoSlice, Write as _},
16    time::{Duration, Instant},
17};
18
19/// Compile-time guard: `payload_len as usize` casts in the H2 parser assume at
20/// least 32-bit pointer width.  This prevents silent truncation on platforms
21/// with smaller pointers (e.g. 16-bit embedded targets).
22const _: () = assert!(
23    std::mem::size_of::<usize>() >= 4,
24    "sozu requires at least 32-bit pointers"
25);
26
27use rusty_ulid::Ulid;
28use sozu_command::{logging::ansi_palette, ready::Ready};
29
30use crate::metrics::names;
31use crate::{
32    L7ListenerHandler, ListenerHandler, Protocol, Readiness, SessionMetrics,
33    protocol::mux::{
34        BackendStatus, Context, DebugEvent, DebugHistory, Endpoint, GenericHttpStream,
35        GlobalStreamId, MuxResult, Position, Stream, StreamId, StreamState, converter,
36        forcefully_terminate_answer,
37        parser::{self, Frame, FrameHeader, FrameType, H2Error, Headers, WindowUpdate},
38        pkawa, remove_backend_stream, serializer, set_default_answer,
39        shared::{EndStreamAction, drain_tls_close_notify, end_stream_decision},
40        update_readiness_after_read, update_readiness_after_write,
41    },
42    socket::{SocketHandler, SocketResult, stats::socket_rtt},
43    timer::TimeoutContainer,
44};
45
46/// Protocol label + session descriptor used as a prefix on every
47/// [`ConnectionH2`] log line. Matches the RUSTLS log-context convention:
48/// `MUX-H2\tSession(...)\t >>>`. When colored output is enabled (via
49/// [`ansi_palette`]) the label is wrapped in bold bright-white ANSI (uniform
50/// across every protocol) and the session detail is rendered in light grey.
51///
52/// Fields included in the session block (chosen to surface the most common
53/// H2 troubleshooting axes — flow stall, leaked stream, draining state,
54/// peer-side gap, reset-flood exposure):
55/// - `peer` — peer address (or `None` if the socket is gone)
56/// - `position` — `Server` / `Client(...)` orientation
57/// - `state` — current [`H2State`]
58/// - `streams` — number of in-flight streams on this connection
59/// - `last_peer_id` — `highest_peer_stream_id` (gap to the peer's view)
60/// - `window` — connection-level send window (RFC 9113 §6.9)
61/// - `draining` — set after the first GOAWAY of a graceful shutdown
62/// - `total_rst_streams_emitted_lifetime` — MadeYouReset counter (CVE-2025-8671)
63/// - `total_rst_received_lifetime` — Rapid Reset counter (CVE-2023-44487)
64/// - `readiness` — connection-level mio readiness snapshot
65///
66/// Computed lazily on each callsite — the helper only materialises when the
67/// log level is enabled, so uncolored hot paths keep a single thread-local
68/// read (the colored check) and one `format!` allocation.
69macro_rules! log_context {
70    ($self:expr) => {{
71        let (open, reset, grey, gray, white) = ansi_palette();
72        format!(
73            "[{ulid} - - -]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
74            open = open,
75            reset = reset,
76            grey = grey,
77            gray = gray,
78            white = white,
79            ulid = $self.session_ulid,
80            peer = $self.socket.socket_ref().peer_addr().ok(),
81            position = $self.position,
82            state = $self.state,
83            streams = $self.streams.len(),
84            last_peer_id = $self.highest_peer_stream_id,
85            window = $self.flow_control.window,
86            draining = $self.drain.draining,
87            total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
88            total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
89            readiness = $self.readiness,
90        )
91    }};
92}
93
94/// Per-stream variant of [`log_context!`] used when a [`Stream`]'s
95/// [`HttpContext`](crate::protocol::kawa_h1::editor::HttpContext) is in
96/// scope. Populates the `request_id`, `cluster_id` and `backend_id` slots of
97/// the bracket so the log line can be filtered by the specific H2 stream it
98/// belongs to.
99#[allow(unused_macros)]
100macro_rules! log_context_stream {
101    ($self:expr, $http_context:expr) => {{
102        let (open, reset, grey, gray, white) = ansi_palette();
103        format!(
104            "[{ulid} {req} {cluster} {backend}]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
105            open = open,
106            reset = reset,
107            grey = grey,
108            gray = gray,
109            white = white,
110            ulid = $self.session_ulid,
111            req = $http_context.id,
112            cluster = $http_context.cluster_id.as_deref().unwrap_or("-"),
113            backend = $http_context.backend_id.as_deref().unwrap_or("-"),
114            peer = $self.socket.socket_ref().peer_addr().ok(),
115            position = $self.position,
116            state = $self.state,
117            streams = $self.streams.len(),
118            last_peer_id = $self.highest_peer_stream_id,
119            window = $self.flow_control.window,
120            draining = $self.drain.draining,
121            total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
122            total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
123            readiness = $self.readiness,
124        )
125    }};
126}
127
128/// Module-level prefix without session context, for logs emitted from
129/// free functions, `H2ConnectionConfig` validation and other sites where no
130/// `ConnectionH2` is in scope. Keeps the `MUX-H2` label consistent with
131/// connection logs and honours the colored flag.
132macro_rules! log_module_context {
133    () => {{
134        let (open, reset, _, _, _) = ansi_palette();
135        format!("{open}MUX-H2{reset}\t >>>", open = open, reset = reset)
136    }};
137}
138
139/// `if let Some(violation) = self.flood_detector.check_flood() { return self.handle_flood_violation(violation); }`
140/// pattern wrapped as a single statement. Pure dispatch — the actual flood
141/// thresholds and counters live inside `H2FloodDetector::check_flood` and
142/// `ConnectionH2::handle_flood_violation`, which the macro does not touch.
143/// Use this at every per-frame counter bump site so the wrapper stays
144/// uniform and a future grep for "flood-check forgot to return" finds zero.
145macro_rules! check_flood_or_return {
146    ($self:expr) => {
147        if let Some(violation) = $self.flood_detector.check_flood() {
148            return $self.handle_flood_violation(violation);
149        }
150    };
151}
152
153/// Outcome of a single-stream write flush in write_streams.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155enum FlushOutcome {
156    /// All queued bytes were drained to the socket.
157    Drained,
158    /// The socket blocked before the queue was drained. The caller must
159    /// arrange to resume (set expect_write or return from write_streams).
160    Stalled,
161}
162
163// ── RFC 9113 §6.5.2 Settings Defaults ───────────────────────────────────────
164
165const DEFAULT_HEADER_TABLE_SIZE: u32 = 4096;
166const DEFAULT_MAX_CONCURRENT_STREAMS: u32 = 100;
167pub(super) const DEFAULT_INITIAL_WINDOW_SIZE: u32 = (1 << 16) - 1; // 65535
168const DEFAULT_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
169
170// RFC 9113 §6.5.2: SETTINGS_MAX_FRAME_SIZE valid range [2^14, 2^24)
171const MIN_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
172const MAX_MAX_FRAME_SIZE: u32 = 1 << 24; // 16777216 (exclusive upper bound)
173
174// RFC 9113 §6.9: maximum flow control window size (2^31 - 1)
175const FLOW_CONTROL_MAX_WINDOW: u32 = (1 << 31) - 1;
176// RFC 9113 §5.1.1: stream identifiers are 31-bit unsigned integers (2^31 - 1).
177const STREAM_ID_MAX: u32 = 0x7FFF_FFFF;
178
179/// Allocate the next locally-initiated stream identifier given the current
180/// `last_stream_id` watermark, returning `(issued_id, next_last_stream_id)`
181/// or `None` when the 31-bit space is exhausted.
182///
183/// RFC 9113 §5.1.1 reserves odd identifiers for clients and even identifiers
184/// for servers. Sōzu never server-pushes, so in practice this helper is
185/// called on the backend (client) side via [`ConnectionH2::new_stream_id`].
186/// The server branch is kept symmetrical so the behaviour is exercised by
187/// the unit tests and remains correct if push is ever enabled.
188///
189/// `last_stream_id` tracks the even "watermark" (2, 4, 6, ...). A client call
190/// issues `watermark - 1` (odd), a server call issues `watermark - 2` (even).
191/// The helper enforces two invariants:
192/// - the issued identifier never exceeds `STREAM_ID_MAX` (2³¹ - 1); and
193/// - the returned watermark is a valid starting point for the next call.
194///
195/// Exhaustion is reported with `None` to the caller, which must emit
196/// GOAWAY(NO_ERROR) and stop issuing new streams on this connection
197/// (see `start_stream` for the client-side drain path).
198pub(super) fn next_stream_id(
199    last_stream_id: StreamId,
200    is_client: bool,
201) -> Option<(StreamId, StreamId)> {
202    let next = last_stream_id.checked_add(2)?;
203    let issued = if is_client {
204        next.checked_sub(1)?
205    } else {
206        next.checked_sub(2)?
207    };
208    // RFC 9113 §5.1.1: stream identifiers are 31-bit. Reject any allocation
209    // whose issued value would exceed `STREAM_ID_MAX`; the watermark itself
210    // is allowed to sit at `STREAM_ID_MAX + 1` (the sentinel that fails the
211    // next call).
212    if issued > STREAM_ID_MAX {
213        return None;
214    }
215    Some((issued, next))
216}
217
218/// Enlarged connection-level receive window (1 MB).
219/// The RFC 9113 default is 65 535 bytes, which is too small for high-throughput
220/// proxying and causes excessive WINDOW_UPDATE round-trips. 1 MB matches the
221/// initial window used by HAProxy, the h2 crate, and other production proxies.
222const ENLARGED_CONNECTION_WINDOW: u32 = 1_048_576;
223
224/// H2 client connection preface size: 24-byte magic + 9-byte SETTINGS frame header
225pub(super) const CLIENT_PREFACE_SIZE: usize = 24 + parser::FRAME_HEADER_SIZE;
226
227// ── Flood Detection Thresholds (CVE mitigations) ────────────────────────────
228
229/// Default maximum RST_STREAM frames per window (CVE-2023-44487 Rapid Reset + CVE-2019-9514)
230const DEFAULT_MAX_RST_STREAM_PER_WINDOW: u32 = 100;
231/// Hard lifetime cap on total RST_STREAM frames received on a single
232/// connection (CVE-2023-44487 Rapid Reset).
233///
234/// The per-window counter half-decays, which allows a patient attacker to
235/// sustain ~50 RST/sec indefinitely — each one costs the backend a request
236/// that will be cancelled before any response work is produced. A lifetime
237/// counter that never decays puts an absolute ceiling on that amplification
238/// per connection. 10 000 is generous for legitimate traffic (months of
239/// occasional client-side cancellations) but rapidly trips on the ~30/sec
240/// abusive pace reported in the CVE-2023-44487 advisory (~5 minutes).
241pub(super) const DEFAULT_MAX_RST_STREAM_LIFETIME: u64 = 10_000;
242/// Hard lifetime cap on RST_STREAM frames received BEFORE the corresponding
243/// backend response has started. These are the cheap-for-client /
244/// expensive-for-us resets that characterise Rapid Reset: the client pays
245/// one RST frame, we pay a round-trip to the backend plus request parsing.
246/// A much lower ceiling kills the attack well before 10 000 lifetime total.
247pub(super) const DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME: u64 = 50;
248/// Absolute lifetime cap on **server-emitted** RST_STREAM frames on a single
249/// connection (CVE-2025-8671 — "MadeYouReset"). Distinct from
250/// [`DEFAULT_MAX_RST_STREAM_LIFETIME`] which caps *received* RSTs
251/// (CVE-2023-44487 Rapid Reset).
252///
253/// MadeYouReset has the server talk itself into flooding: the attacker sends
254/// legitimate-looking frames that force the server to emit RST_STREAM (content
255/// -length mismatch, header parse error, rejected priority, zero-increment
256/// `WINDOW_UPDATE` on an open stream, …). Each forced RST costs the server a
257/// header-decode, kawa buffer setup and frame serialisation; uncapped, it
258/// becomes the same class of DoS as Rapid Reset but with a flipped emission
259/// direction.
260///
261/// 500 is conservative: legitimate traffic very rarely triggers a
262/// server-initiated RST (aside from graceful `NoError` cancels which are not
263/// counted), so crossing 500 on a single connection is a strong abuse signal.
264pub(super) const DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME: u64 = 500;
265/// Default maximum PING frames per window (CVE-2019-9512 Ping Flood)
266const DEFAULT_MAX_PING_PER_WINDOW: u32 = 100;
267/// Absolute lifetime cap on PING frames received on a single connection.
268/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
269/// keep-alives but trips on sustained low-rate abuse (CVE-2019-9512).
270const DEFAULT_MAX_PING_LIFETIME: u32 = 10_000;
271/// Default maximum SETTINGS frames per window (CVE-2019-9515 Settings Flood)
272const DEFAULT_MAX_SETTINGS_PER_WINDOW: u32 = 50;
273/// Absolute lifetime cap on SETTINGS frames received on a single connection.
274/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
275/// renegotiations but trips on sustained low-rate abuse (CVE-2019-9515).
276const DEFAULT_MAX_SETTINGS_LIFETIME: u32 = 10_000;
277/// Default maximum empty DATA frames per window (CVE-2019-9518 Empty Frames)
278const DEFAULT_MAX_EMPTY_DATA_PER_WINDOW: u32 = 100;
279/// Default maximum connection-level (stream 0) WINDOW_UPDATE frames per
280/// sliding window. Non-zero stream-0 WINDOW_UPDATE frames are otherwise
281/// uncounted by the generic glitch detector — a peer could burn proxy CPU by
282/// sending millions of legal-looking stream-0 WINDOW_UPDATEs. Value mirrors
283/// [`DEFAULT_MAX_EMPTY_DATA_PER_WINDOW`] / [`DEFAULT_MAX_PING_PER_WINDOW`] —
284/// legitimate proxies only need a handful per second.
285const DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW: u32 = 100;
286/// Default maximum CONTINUATION frames per header block (CVE-2024-27316)
287const DEFAULT_MAX_CONTINUATION_FRAMES: u32 = 20;
288/// Maximum accumulated header block size across CONTINUATION frames (64KB)
289pub(super) const MAX_HEADER_LIST_SIZE: usize = 65536;
290/// Default maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE)
291/// accepted from the peer. 64 KB is well above the RFC default of 4 KB
292/// while preventing a malicious peer from advertising up to 4 GB.
293const DEFAULT_MAX_HEADER_TABLE_SIZE: u32 = 65536;
294/// Duration of the sliding window for rate-based flood counters
295const FLOOD_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(1);
296/// Default maximum general anomaly count before triggering ENHANCE_YOUR_CALM
297const DEFAULT_MAX_GLITCH_COUNT: u32 = 100;
298
299/// RFC 9113 §5.1.2: threshold of `REFUSED_STREAM` emissions per
300/// [`BACKPRESSURE_WINDOW_DURATION`] that triggers back-pressure — at this
301/// point we halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` so the
302/// peer throttles its request rate instead of paying the RST round-trip for
303/// every new stream.
304const BACKPRESSURE_REFUSAL_THRESHOLD: u32 = 50;
305/// Sliding window used to detect refusal bursts for SETTINGS back-pressure.
306const BACKPRESSURE_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(60);
307
308/// Configurable thresholds for H2 flood detection.
309///
310/// All values have safe defaults matching the compile-time constants.
311/// When configured via listener config, `None` values fall back to these defaults.
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub struct H2FloodConfig {
314    /// Maximum RST_STREAM frames per second window (CVE-2023-44487, CVE-2019-9514)
315    pub max_rst_stream_per_window: u32,
316    /// Maximum PING frames per second window (CVE-2019-9512)
317    pub max_ping_per_window: u32,
318    /// Maximum SETTINGS frames per second window (CVE-2019-9515)
319    pub max_settings_per_window: u32,
320    /// Maximum empty DATA frames per second window (CVE-2019-9518)
321    pub max_empty_data_per_window: u32,
322    /// Maximum connection-level (stream 0) WINDOW_UPDATE frames per sliding
323    /// window. Caps the CPU cost of a peer sending a flood of non-zero
324    /// stream-0 WINDOW_UPDATEs — each is individually legal so the generic
325    /// glitch counter does not trip, yet millions per connection still burn
326    /// server CPU parsing and updating the flow window.
327    pub max_window_update_stream0_per_window: u32,
328    /// Maximum CONTINUATION frames per header block (CVE-2024-27316)
329    pub max_continuation_frames: u32,
330    /// Maximum accumulated protocol anomalies before ENHANCE_YOUR_CALM
331    pub max_glitch_count: u32,
332    /// Absolute lifetime cap on RST_STREAM frames received on a single
333    /// connection (CVE-2023-44487). Never decays — provides a ceiling the
334    /// per-window counter cannot.
335    pub max_rst_stream_lifetime: u64,
336    /// Lifetime cap on "abusive" (pre-response-start) RST_STREAM frames —
337    /// the Rapid Reset signature (CVE-2023-44487).
338    pub max_rst_stream_abusive_lifetime: u64,
339    /// Absolute lifetime cap on **server-emitted** RST_STREAM frames for this
340    /// connection (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
341    /// count — graceful cancels are exempt.
342    pub max_rst_stream_emitted_lifetime: u64,
343    /// Maximum accumulated HPACK-decoded header list size per request
344    /// (SETTINGS_MAX_HEADER_LIST_SIZE, RFC 9113 §6.5.2).
345    pub max_header_list_size: u32,
346    /// Maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE) accepted
347    /// from the peer. Caps the value the peer advertises in SETTINGS frames to
348    /// prevent unbounded HPACK encoder memory growth.
349    pub max_header_table_size: u32,
350}
351
352impl Default for H2FloodConfig {
353    fn default() -> Self {
354        Self {
355            max_rst_stream_per_window: DEFAULT_MAX_RST_STREAM_PER_WINDOW,
356            max_ping_per_window: DEFAULT_MAX_PING_PER_WINDOW,
357            max_settings_per_window: DEFAULT_MAX_SETTINGS_PER_WINDOW,
358            max_empty_data_per_window: DEFAULT_MAX_EMPTY_DATA_PER_WINDOW,
359            max_window_update_stream0_per_window: DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW,
360            max_continuation_frames: DEFAULT_MAX_CONTINUATION_FRAMES,
361            max_glitch_count: DEFAULT_MAX_GLITCH_COUNT,
362            max_rst_stream_lifetime: DEFAULT_MAX_RST_STREAM_LIFETIME,
363            max_rst_stream_abusive_lifetime: DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME,
364            max_rst_stream_emitted_lifetime: DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME,
365            max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
366            max_header_table_size: DEFAULT_MAX_HEADER_TABLE_SIZE,
367        }
368    }
369}
370
371impl H2FloodConfig {
372    /// Create a validated config, clamping all thresholds to at least 1.
373    /// Zero thresholds would cause immediate flood detection on any frame.
374    #[allow(clippy::too_many_arguments)]
375    pub fn new(
376        max_rst_stream_per_window: u32,
377        max_ping_per_window: u32,
378        max_settings_per_window: u32,
379        max_empty_data_per_window: u32,
380        max_window_update_stream0_per_window: u32,
381        max_continuation_frames: u32,
382        max_glitch_count: u32,
383        max_rst_stream_lifetime: u64,
384        max_rst_stream_abusive_lifetime: u64,
385        max_rst_stream_emitted_lifetime: u64,
386        max_header_list_size: u32,
387        max_header_table_size: u32,
388    ) -> Self {
389        Self {
390            max_rst_stream_per_window: max_rst_stream_per_window.max(1),
391            max_ping_per_window: max_ping_per_window.max(1),
392            max_settings_per_window: max_settings_per_window.max(1),
393            max_empty_data_per_window: max_empty_data_per_window.max(1),
394            max_window_update_stream0_per_window: max_window_update_stream0_per_window.max(1),
395            max_continuation_frames: max_continuation_frames.max(1),
396            max_glitch_count: max_glitch_count.max(1),
397            max_rst_stream_lifetime: max_rst_stream_lifetime.max(1),
398            max_rst_stream_abusive_lifetime: max_rst_stream_abusive_lifetime.max(1),
399            max_rst_stream_emitted_lifetime: max_rst_stream_emitted_lifetime.max(1),
400            max_header_list_size: max_header_list_size.max(1),
401            max_header_table_size: max_header_table_size.max(1),
402        }
403    }
404}
405
406/// Default stream Vec shrink ratio: shrink when total > active * ratio.
407const DEFAULT_STREAM_SHRINK_RATIO: u32 = 2;
408
409/// Configurable H2 connection tuning parameters.
410///
411/// All values have safe defaults. When configured via listener config,
412/// absent values fall back to compile-time defaults.
413#[derive(Debug, Clone, Copy, PartialEq, Eq)]
414pub struct H2ConnectionConfig {
415    /// Connection-level receive window size in bytes (RFC 9113 §6.9.2).
416    pub initial_connection_window: u32,
417    /// Maximum concurrent streams (SETTINGS_MAX_CONCURRENT_STREAMS).
418    pub max_concurrent_streams: u32,
419    /// Shrink threshold ratio for recycled stream slots.
420    pub stream_shrink_ratio: u32,
421}
422
423impl Default for H2ConnectionConfig {
424    fn default() -> Self {
425        Self {
426            initial_connection_window: ENLARGED_CONNECTION_WINDOW,
427            max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
428            stream_shrink_ratio: DEFAULT_STREAM_SHRINK_RATIO,
429        }
430    }
431}
432
433impl H2ConnectionConfig {
434    /// Create a validated config, clamping to safe bounds.
435    ///
436    /// - `initial_connection_window`: clamped to \[65535, 2^31-1\] per RFC 9113 §6.9
437    /// - `max_concurrent_streams`: minimum 1
438    /// - `stream_shrink_ratio`: minimum 2 (1 would defeat slot recycling)
439    pub fn new(
440        initial_connection_window: u32,
441        max_concurrent_streams: u32,
442        stream_shrink_ratio: u32,
443    ) -> Self {
444        let clamped_window =
445            initial_connection_window.clamp(DEFAULT_INITIAL_WINDOW_SIZE, FLOW_CONTROL_MAX_WINDOW);
446        if clamped_window != initial_connection_window {
447            warn!(
448                "{} h2_initial_connection_window {} clamped to [{}, {}]",
449                log_module_context!(),
450                initial_connection_window,
451                DEFAULT_INITIAL_WINDOW_SIZE,
452                FLOW_CONTROL_MAX_WINDOW
453            );
454        }
455        const MAX_SAFE_CONCURRENT_STREAMS: u32 = 10_000;
456        let clamped_streams = max_concurrent_streams.clamp(1, MAX_SAFE_CONCURRENT_STREAMS);
457        if max_concurrent_streams > MAX_SAFE_CONCURRENT_STREAMS {
458            error!(
459                "{} h2_max_concurrent_streams={} exceeds safe limit, clamped to {}",
460                log_module_context!(),
461                max_concurrent_streams,
462                MAX_SAFE_CONCURRENT_STREAMS
463            );
464        }
465        if clamped_streams != max_concurrent_streams
466            && max_concurrent_streams <= MAX_SAFE_CONCURRENT_STREAMS
467        {
468            warn!(
469                "{} h2_max_concurrent_streams {} clamped to minimum 1",
470                log_module_context!(),
471                max_concurrent_streams
472            );
473        }
474        let clamped_ratio = stream_shrink_ratio.max(2);
475        if clamped_ratio != stream_shrink_ratio {
476            warn!(
477                "{} h2_stream_shrink_ratio {} clamped to minimum 2",
478                log_module_context!(),
479                stream_shrink_ratio
480            );
481        }
482        Self {
483            initial_connection_window: clamped_window,
484            max_concurrent_streams: clamped_streams,
485            stream_shrink_ratio: clamped_ratio,
486        }
487    }
488
489    /// Create from optional config values, falling back to compile-time defaults.
490    /// Combines unwrap-or-default with validation clamping.
491    pub fn from_optional(
492        window: Option<u32>,
493        max_streams: Option<u32>,
494        shrink_ratio: Option<u32>,
495    ) -> Self {
496        let defaults = Self::default();
497        Self::new(
498            window.unwrap_or(defaults.initial_connection_window),
499            max_streams.unwrap_or(defaults.max_concurrent_streams),
500            shrink_ratio.unwrap_or(defaults.stream_shrink_ratio),
501        )
502    }
503}
504
505/// Default pending WINDOW_UPDATE capacity (used in tests).
506/// The actual per-connection cap is computed from `connection_config.max_concurrent_streams`.
507#[cfg(test)]
508const DEFAULT_MAX_PENDING_WINDOW_UPDATES: usize = 1 + DEFAULT_MAX_CONCURRENT_STREAMS as usize * 4;
509
510/// Maximum number of pending RST_STREAM frames before triggering GOAWAY.
511/// When a peer causes excessive RST_STREAM queueing (e.g. rapid stream creation
512/// beyond MAX_CONCURRENT_STREAMS), this cap prevents unbounded memory growth
513/// and triggers an ENHANCE_YOUR_CALM connection error.
514const MAX_PENDING_RST_STREAMS: usize = 200;
515
516/// RFC 9113 §6.5: maximum time (in seconds) to wait for SETTINGS ACK before
517/// sending GOAWAY with SETTINGS_TIMEOUT error code.
518const SETTINGS_ACK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
519
520#[inline(always)]
521fn error_nom_to_h2(error: nom::Err<parser::ParserError>) -> H2Error {
522    match error {
523        nom::Err::Error(parser::ParserError {
524            kind: parser::ParserErrorKind::H2(e),
525            ..
526        }) => e,
527        nom::Err::Failure(parser::ParserError {
528            kind: parser::ParserErrorKind::H2(e),
529            ..
530        }) => e,
531        _ => H2Error::ProtocolError,
532    }
533}
534
535/// Distribute connection-level byte overhead proportionally to a single stream.
536///
537/// Overhead is distributed in proportion to the bytes this stream transferred
538/// relative to the total across all active streams. A stream that transferred
539/// 60% of total bytes gets 60% of the overhead.
540///
541/// `stream_bytes` and `total_bytes` are `(bytes_in, bytes_out)` tuples.
542/// Falls back to even distribution (1/active_streams) when no stream has
543/// transferred any bytes yet (total is zero).
544///
545/// Extracted as a free function to avoid borrow conflicts when `self` fields
546/// (e.g. `encoder`) are borrowed by the converter while we need to update
547/// per-stream metrics and connection overhead counters.
548fn distribute_overhead(
549    metrics: &mut SessionMetrics,
550    overhead_bin: &mut usize,
551    overhead_bout: &mut usize,
552    stream_bytes: (usize, usize),
553    total_bytes: (usize, usize),
554    active_streams: usize,
555    is_last_stream: bool,
556) {
557    let share_in = if is_last_stream {
558        // Last stream gets all remaining overhead to avoid losing remainder bytes
559        // from integer division across earlier streams.
560        *overhead_bin
561    } else if total_bytes.0 > 0 {
562        // Clamp to remaining overhead — integer division rounding across multiple
563        // streams can cause accumulated shares to exceed the total.
564        (*overhead_bin * stream_bytes.0 / total_bytes.0).min(*overhead_bin)
565    } else {
566        // No stream has transferred any inbound bytes — fall back to even split.
567        *overhead_bin / active_streams.max(1)
568    };
569    let share_out = if is_last_stream {
570        *overhead_bout
571    } else if total_bytes.1 > 0 {
572        (*overhead_bout * stream_bytes.1 / total_bytes.1).min(*overhead_bout)
573    } else {
574        // No stream has transferred any outbound bytes — fall back to even split.
575        *overhead_bout / active_streams.max(1)
576    };
577    metrics.bin += share_in;
578    metrics.bout += share_out;
579    *overhead_bin -= share_in;
580    *overhead_bout -= share_out;
581}
582
583/// LIFECYCLE §9 invariant 16 probe: returns `true` if any open stream still
584/// has outbound kawa bytes queued (`back.out` non-empty or `back.blocks`
585/// non-drained).
586///
587/// Used by `finalize_write` to preserve `Ready::WRITABLE` across a voluntary
588/// scheduler yield, and by `has_pending_write_full` to block shutdown-drain
589/// while bytes are still owed to the frontend.
590///
591/// `.get()` rather than direct indexing: an unknown `GlobalStreamId` is
592/// treated as "no pending bytes" rather than panicking — defence-in-depth
593/// against a stream-removal race during shutdown.
594fn any_stream_has_pending_back(
595    streams: &HashMap<StreamId, GlobalStreamId>,
596    context_streams: &[Stream],
597) -> bool {
598    any_stream_id_matches(streams, |gid| {
599        context_streams
600            .get(gid)
601            .is_some_and(|s| !s.back.out.is_empty() || !s.back.blocks.is_empty())
602    })
603}
604
605/// Iteration core of [`any_stream_has_pending_back`], split out so the
606/// invariant-16 dispatch is unit-testable without a full [`Stream`] fixture
607/// (the existing test module only covers `H2FloodDetector`).
608fn any_stream_id_matches<F>(streams: &HashMap<StreamId, GlobalStreamId>, mut probe: F) -> bool
609where
610    F: FnMut(GlobalStreamId) -> bool,
611{
612    streams.values().any(|gid| probe(*gid))
613}
614
615/// Core of [`ConnectionH2::enqueue_rst`], extracted so the RST-queueing
616/// semantics (dedupe, queued-cap counter bump, invariant-15 readiness rearm)
617/// can be unit-tested without building a full `ConnectionH2<Front>` fixture.
618///
619/// Invariants enforced:
620/// - **Dedupe** via `rst_sent`: at most one queued RST per wire stream id.
621///   `HashSet::insert` returns `false` when the id is already present; we
622///   short-circuit on that branch to keep `pending_rst_streams`,
623///   `total_rst_streams_queued` and the wire counts consistent.
624/// - **MadeYouReset queued cap** (`MAX_PENDING_RST_STREAMS`): each freshly
625///   queued RST bumps `total_rst_streams_queued`, which
626///   `flush_pending_control_frames` polices to escalate to
627///   `GOAWAY(ENHANCE_YOUR_CALM)` when exceeded.
628/// - **Invariant 15** (edge-triggered epoll): pair `Ready::WRITABLE` interest
629///   with the event bit so `writable()` is scheduled on the next tick.
630///
631/// Returns `true` when the RST was freshly queued, `false` when the
632/// stream was already in `rst_sent` (the caller asked to RST the same
633/// stream twice — a benign re-entrant idempotency, NOT a new wire
634/// emission). The boolean lets [`ConnectionH2::enqueue_rst`] account
635/// the RST only on the freshly-queued path so duplicate calls do not
636/// inflate the per-error counter or trip the MadeYouReset flood cap
637/// for frames that never reach the wire.
638fn enqueue_rst_into(
639    pending: &mut Vec<(StreamId, H2Error)>,
640    total: &mut usize,
641    rst_sent: &mut HashSet<StreamId>,
642    readiness: &mut Readiness,
643    wire_stream_id: StreamId,
644    error: H2Error,
645) -> bool {
646    if !rst_sent.insert(wire_stream_id) {
647        return false;
648    }
649    pending.push((wire_stream_id, error));
650    *total += 1;
651    readiness.arm_writable();
652    true
653}
654
655/// Detail of a flood-threshold violation returned by
656/// [`H2FloodDetector::check_flood`] and [`H2FloodDetector::record_rst_lifetime`].
657///
658/// Carrying `(reason, count, threshold)` lets the caller emit a session-scoped
659/// log line with full context — the detector itself is connection-agnostic and
660/// never logs.
661#[derive(Debug, Clone, PartialEq)]
662pub struct H2FloodViolation {
663    /// HTTP/2 error code to emit on the GOAWAY.
664    pub error: H2Error,
665    /// Human-readable name of the counter that tripped (e.g. `"RST_STREAM"`).
666    pub reason: &'static str,
667    /// Statsd metric key emitted by [`ConnectionH2::handle_flood_violation`].
668    /// Carried alongside `reason` so a single field maps to both the log line
669    /// and the dashboard counter — adding a new violation kind requires
670    /// choosing both at the construction site, preventing drift.
671    pub metric_key: &'static str,
672    /// Observed counter value at the moment of detection.
673    pub count: u64,
674    /// Configured ceiling that was crossed.
675    pub threshold: u64,
676}
677
678/// Tracks per-connection frame rates to detect and mitigate H2 flood attacks.
679///
680/// Monitors RST_STREAM (CVE-2023-44487), PING (CVE-2019-9512), SETTINGS (CVE-2019-9515),
681/// empty DATA (CVE-2019-9518), and CONTINUATION (CVE-2024-27316) flood patterns.
682/// When any counter exceeds its threshold, `check_flood()` returns the violation
683/// detail so callers can log with connection context before sending GOAWAY.
684///
685/// Thresholds are configurable via [`H2FloodConfig`], with safe defaults matching
686/// the original compile-time constants.
687#[derive(Debug)]
688pub struct H2FloodDetector {
689    /// RST_STREAM frames received in current window (CVE-2023-44487 + CVE-2019-9514)
690    pub(super) rst_stream_count: u32,
691    /// Lifetime RST_STREAM frames received on this connection.
692    ///
693    /// Never decays — provides an absolute ceiling that the half-decaying
694    /// per-window counter cannot, preventing a sustained ~50 RST/sec burst
695    /// from running forever.
696    pub(super) total_rst_received_lifetime: u64,
697    /// Lifetime RST_STREAM frames received that targeted a stream whose
698    /// backend response had not yet started. These are the "Rapid Reset"
699    /// signature — cheap for the attacker, expensive for the proxy — and
700    /// trip on a much lower ceiling than the generic lifetime counter.
701    pub(super) total_abusive_rst_received_lifetime: u64,
702    /// Lifetime RST_STREAM frames **emitted by the server** on this
703    /// connection (CVE-2025-8671 "MadeYouReset" mitigation). Incremented
704    /// inside [`ConnectionH2::reset_stream`] whenever a non-`NoError` reset
705    /// is triggered by an attacker-crafted frame (content-length mismatch,
706    /// header parse error, priority rejection, zero-increment WINDOW_UPDATE
707    /// on an open stream). Never decays — provides an absolute ceiling that
708    /// short-circuits patient-attacker patterns that stay under any windowed
709    /// counter.
710    pub(super) total_rst_streams_emitted_lifetime: u64,
711    /// PING frames received in current window (CVE-2019-9512)
712    pub(super) ping_count: u32,
713    /// Lifetime PING frames received on this connection.
714    ///
715    /// Never decays — provides an absolute ceiling that the half-decaying
716    /// per-window counter cannot, preventing sustained low-rate PING abuse.
717    pub(super) total_ping_received_lifetime: u32,
718    /// SETTINGS frames received in current window (CVE-2019-9515)
719    pub(super) settings_count: u32,
720    /// Lifetime SETTINGS frames received on this connection.
721    ///
722    /// Never decays — provides an absolute ceiling that the half-decaying
723    /// per-window counter cannot, preventing sustained low-rate SETTINGS abuse.
724    pub(super) total_settings_received_lifetime: u32,
725    /// Empty DATA frames received in current window (CVE-2019-9518)
726    pub(super) empty_data_count: u32,
727    /// Connection-level (stream 0) WINDOW_UPDATE frames received in current
728    /// sliding window. Half-decays with [`maybe_reset_window`] like other
729    /// rate counters. Increments on non-zero stream-0 WINDOW_UPDATEs only —
730    /// zero-increment frames short-circuit into GOAWAY(PROTOCOL_ERROR) per
731    /// RFC 9113 §6.9 before reaching this counter.
732    pub(super) window_update_stream0_count: u32,
733    /// CONTINUATION frames received for current header block (CVE-2024-27316)
734    pub(super) continuation_count: u32,
735    /// Total accumulated header block size across CONTINUATION frames
736    pub(super) accumulated_header_size: u32,
737    /// General anomaly counter
738    pub(super) glitch_count: u32,
739    /// Window start for rate-based counters
740    pub(super) window_start: Instant,
741    /// Configurable thresholds for flood detection
742    pub(super) config: H2FloodConfig,
743}
744
745impl Default for H2FloodDetector {
746    fn default() -> Self {
747        Self::new(H2FloodConfig::default())
748    }
749}
750
751impl H2FloodDetector {
752    pub fn new(config: H2FloodConfig) -> Self {
753        Self {
754            rst_stream_count: 0,
755            total_rst_received_lifetime: 0,
756            total_abusive_rst_received_lifetime: 0,
757            total_rst_streams_emitted_lifetime: 0,
758            ping_count: 0,
759            total_ping_received_lifetime: 0,
760            settings_count: 0,
761            total_settings_received_lifetime: 0,
762            empty_data_count: 0,
763            window_update_stream0_count: 0,
764            continuation_count: 0,
765            accumulated_header_size: 0,
766            glitch_count: 0,
767            window_start: Instant::now(),
768            config,
769        }
770    }
771
772    /// Increment the lifetime RST_STREAM counters and return a
773    /// [`H2FloodViolation`] if either the global or the abusive
774    /// (pre-response-start) lifetime cap has been exceeded.
775    ///
776    /// `response_started` indicates whether the backend response had already
777    /// begun when the RST arrived; `false` is the cheap-for-client /
778    /// expensive-for-us Rapid Reset signature (CVE-2023-44487).
779    pub fn record_rst_lifetime(&mut self, response_started: bool) -> Option<H2FloodViolation> {
780        self.total_rst_received_lifetime = self.total_rst_received_lifetime.saturating_add(1);
781        if !response_started {
782            self.total_abusive_rst_received_lifetime =
783                self.total_abusive_rst_received_lifetime.saturating_add(1);
784        }
785        if self.total_rst_received_lifetime > self.config.max_rst_stream_lifetime {
786            return Some(H2FloodViolation {
787                error: H2Error::EnhanceYourCalm,
788                reason: "Rapid Reset: lifetime RST_STREAM",
789                metric_key: "h2.flood.violation.rst_stream_lifetime",
790                count: self.total_rst_received_lifetime,
791                threshold: self.config.max_rst_stream_lifetime,
792            });
793        }
794        if self.total_abusive_rst_received_lifetime > self.config.max_rst_stream_abusive_lifetime {
795            return Some(H2FloodViolation {
796                error: H2Error::EnhanceYourCalm,
797                reason: "Rapid Reset: lifetime pre-response RST_STREAM",
798                metric_key: "h2.flood.violation.rst_stream_pre_response_lifetime",
799                count: self.total_abusive_rst_received_lifetime,
800                threshold: self.config.max_rst_stream_abusive_lifetime,
801            });
802        }
803        None
804    }
805
806    /// Increment the lifetime **server-emitted** RST_STREAM counter and
807    /// return a [`H2FloodViolation`] once the configured ceiling is exceeded.
808    ///
809    /// Call sites are the error paths inside [`ConnectionH2::reset_stream`]
810    /// where an attacker-crafted frame coerces the server into emitting a
811    /// RST_STREAM (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
812    /// are reported — callers must exclude graceful cancels.
813    pub fn record_rst_emitted(&mut self) -> Option<H2FloodViolation> {
814        self.total_rst_streams_emitted_lifetime =
815            self.total_rst_streams_emitted_lifetime.saturating_add(1);
816        if self.total_rst_streams_emitted_lifetime > self.config.max_rst_stream_emitted_lifetime {
817            return Some(H2FloodViolation {
818                error: H2Error::EnhanceYourCalm,
819                reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
820                metric_key: "h2.flood.violation.rst_stream_emitted_lifetime",
821                count: self.total_rst_streams_emitted_lifetime,
822                threshold: self.config.max_rst_stream_emitted_lifetime,
823            });
824        }
825        None
826    }
827
828    /// Half-decay rate-based counters if the current window has expired.
829    /// Uses half-window decay instead of full reset to catch burst-then-wait attacks.
830    fn maybe_reset_window(&mut self) {
831        if self.window_start.elapsed() >= FLOOD_WINDOW_DURATION {
832            self.rst_stream_count /= 2;
833            self.ping_count /= 2;
834            self.settings_count /= 2;
835            self.empty_data_count /= 2;
836            self.window_update_stream0_count /= 2;
837            self.glitch_count /= 2;
838            self.window_start = Instant::now();
839        }
840    }
841
842    /// Check all flood counters. Returns a [`H2FloodViolation`] when a threshold
843    /// is exceeded; the caller is responsible for logging with session context
844    /// and escalating to GOAWAY.
845    pub fn check_flood(&mut self) -> Option<H2FloodViolation> {
846        self.maybe_reset_window();
847
848        fn flag(
849            reason: &'static str,
850            metric_key: &'static str,
851            count: u32,
852            threshold: u32,
853        ) -> Option<H2FloodViolation> {
854            if count > threshold {
855                Some(H2FloodViolation {
856                    error: H2Error::EnhanceYourCalm,
857                    reason,
858                    metric_key,
859                    count: count as u64,
860                    threshold: threshold as u64,
861                })
862            } else {
863                None
864            }
865        }
866
867        flag(
868            "RST_STREAM",
869            "h2.flood.violation.rst_stream_window",
870            self.rst_stream_count,
871            self.config.max_rst_stream_per_window,
872        )
873        .or_else(|| {
874            flag(
875                "PING",
876                "h2.flood.violation.ping_window",
877                self.ping_count,
878                self.config.max_ping_per_window,
879            )
880        })
881        .or_else(|| {
882            flag(
883                "PING lifetime",
884                "h2.flood.violation.ping_lifetime",
885                self.total_ping_received_lifetime,
886                DEFAULT_MAX_PING_LIFETIME,
887            )
888        })
889        .or_else(|| {
890            flag(
891                "SETTINGS",
892                "h2.flood.violation.settings_window",
893                self.settings_count,
894                self.config.max_settings_per_window,
895            )
896        })
897        .or_else(|| {
898            flag(
899                "SETTINGS lifetime",
900                "h2.flood.violation.settings_lifetime",
901                self.total_settings_received_lifetime,
902                DEFAULT_MAX_SETTINGS_LIFETIME,
903            )
904        })
905        .or_else(|| {
906            flag(
907                "empty DATA",
908                "h2.flood.violation.empty_data_window",
909                self.empty_data_count,
910                self.config.max_empty_data_per_window,
911            )
912        })
913        .or_else(|| {
914            flag(
915                "CONTINUATION",
916                "h2.flood.violation.continuation_per_block",
917                self.continuation_count,
918                self.config.max_continuation_frames,
919            )
920        })
921        .or_else(|| {
922            flag(
923                "WINDOW_UPDATE stream 0",
924                "h2.flood.violation.window_update_stream0_window",
925                self.window_update_stream0_count,
926                self.config.max_window_update_stream0_per_window,
927            )
928        })
929        .or_else(|| {
930            flag(
931                "accumulated header size",
932                "h2.flood.violation.header_size_per_block",
933                self.accumulated_header_size,
934                self.config.max_header_list_size,
935            )
936        })
937        .or_else(|| {
938            flag(
939                "glitch",
940                "h2.flood.violation.glitch_window",
941                self.glitch_count,
942                self.config.max_glitch_count,
943            )
944        })
945    }
946
947    /// Reset CONTINUATION-specific counters when a header block is complete.
948    pub fn reset_continuation(&mut self) {
949        self.continuation_count = 0;
950        self.accumulated_header_size = 0;
951    }
952}
953
954#[derive(Debug)]
955pub enum H2State {
956    ClientPreface,
957    ClientSettings,
958    ServerSettings,
959    Header,
960    Frame(FrameHeader),
961    ContinuationHeader(Headers),
962    ContinuationFrame(Headers),
963    GoAway,
964    Error,
965    Discard,
966}
967
968#[derive(Debug, Clone, Copy)]
969pub struct H2Settings {
970    pub settings_header_table_size: u32,
971    pub settings_enable_push: bool,
972    pub settings_max_concurrent_streams: u32,
973    pub settings_initial_window_size: u32,
974    pub settings_max_frame_size: u32,
975    pub settings_max_header_list_size: u32,
976    /// RFC 8441
977    pub settings_enable_connect_protocol: bool,
978    /// RFC 9218
979    pub settings_no_rfc7540_priorities: bool,
980}
981
982impl Default for H2Settings {
983    fn default() -> Self {
984        Self {
985            settings_header_table_size: DEFAULT_HEADER_TABLE_SIZE,
986            settings_enable_push: false,
987            settings_max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
988            settings_initial_window_size: DEFAULT_INITIAL_WINDOW_SIZE,
989            settings_max_frame_size: DEFAULT_MAX_FRAME_SIZE,
990            settings_max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
991            settings_enable_connect_protocol: false,
992            settings_no_rfc7540_priorities: true,
993        }
994    }
995}
996
997/// RFC 9218 Extensible Priorities for HTTP stream scheduling.
998///
999/// Stores per-stream urgency (0-7, lower = more important) and incremental
1000/// flag. Used by `writable()` to sort streams: lower urgency first, then
1001/// stream ID for stability among same-urgency non-incremental streams.
1002///
1003/// Within a same-urgency bucket the scheduler (see
1004/// [`ConnectionH2::write_streams`]) drains non-incremental streams
1005/// sequentially, then applies RFC 9218 §4 round-robin to the incremental
1006/// streams starting from [`Self::incremental_cursor`], so multiple concurrent
1007/// downloads at the same urgency interleave their DATA frames fairly.
1008///
1009/// Streams without an explicit `priority` header get the RFC 9218 defaults:
1010/// urgency 3, incremental false.
1011#[derive(Default)]
1012pub struct Prioriser {
1013    /// Per-stream priority: stream_id -> (urgency 0-7, incremental flag)
1014    priorities: HashMap<StreamId, (u8, bool)>,
1015    /// RFC 9218 §4 round-robin cursor: stream ID that fired first in the
1016    /// last write pass over the incremental tail of the lowest-urgency
1017    /// bucket that contained at least one incremental stream. The next pass
1018    /// starts from the stream immediately after this ID (wrapping around),
1019    /// so a single slow-draining stream cannot hog the connection.
1020    ///
1021    /// `0` is the "no cursor yet" sentinel and means "start from the
1022    /// smallest ID in the bucket" — H2 stream IDs are always > 0.
1023    incremental_cursor: StreamId,
1024}
1025
1026/// RFC 9218 §4 default urgency value.
1027const DEFAULT_URGENCY: u8 = 3;
1028
1029/// Maximum entries in the priority map to prevent flooding via PRIORITY frames.
1030const MAX_PRIORITIES: usize = 4096;
1031
1032/// Small look-ahead window (in stream IDs) for PRIORITY frames that arrive
1033/// slightly before the peer opens the corresponding stream. RFC 9218 allows
1034/// PRIORITY to be sent for an idle stream that the peer intends to open
1035/// soon. Past this budget we assume the ID will never be used and drop the
1036/// entry, preventing flooding with far-future stream IDs.
1037const PRIORITY_IDLE_LOOKAHEAD: u32 = 64;
1038
1039impl Prioriser {
1040    /// Record or update the priority for a stream that we know exists or are
1041    /// currently processing (used from pkawa's header-handling path where the
1042    /// owning stream's HEADERS frame is being decoded).
1043    ///
1044    /// Returns `true` if the priority is invalid (self-dependency for RFC 7540),
1045    /// signalling the caller should reset the stream with a protocol error.
1046    pub fn push_priority(&mut self, stream_id: StreamId, priority: parser::PriorityPart) -> bool {
1047        trace!(
1048            "{} PRIORITY REQUEST FOR {}: {:?}",
1049            log_module_context!(),
1050            stream_id,
1051            priority
1052        );
1053        // Cap the priority map to prevent flooding via PRIORITY frames
1054        if !self.priorities.contains_key(&stream_id) && self.priorities.len() >= MAX_PRIORITIES {
1055            return false;
1056        }
1057        match priority {
1058            parser::PriorityPart::Rfc7540 {
1059                stream_dependency,
1060                weight: _,
1061            } => {
1062                // RFC 9113 §5.3.1: a stream cannot depend on itself; signal
1063                // the caller to RST_STREAM with PROTOCOL_ERROR. Otherwise the
1064                // RFC 7540 priority tree is deprecated and silently ignored.
1065                stream_dependency.stream_id == stream_id
1066            }
1067            parser::PriorityPart::Rfc9218 {
1068                urgency,
1069                incremental,
1070            } => {
1071                // RFC 9218 §7.1: a malformed or out-of-range priority field
1072                // MUST be "treated as absent", NOT as a stream error. Clamping
1073                // an urgency > 7 to 7 is the policy-correct interpretation:
1074                // the field is still present (so defaulting would lose
1075                // information) but its value is normalised to the RFC's
1076                // allowed range [0..=7]. Intentionally not PROTOCOL_ERROR.
1077                self.priorities
1078                    .insert(stream_id, (urgency.min(7), incremental));
1079                false
1080            }
1081        }
1082    }
1083
1084    /// Record or update the priority for a stream ID that arrived via a
1085    /// standalone PRIORITY frame.
1086    ///
1087    /// Pass 3 Medium #4: without this guard, a peer could send PRIORITY for
1088    /// arbitrary stream IDs (e.g. 2^31 ever-increasing IDs) and pin up to
1089    /// `MAX_PRIORITIES` entries of memory. Accept only:
1090    /// - an ID that corresponds to a currently-open stream (`open_streams`);
1091    /// - an idle ID slightly ahead of `last_stream_id` (within
1092    ///   [`PRIORITY_IDLE_LOOKAHEAD`]), matching RFC 9218's "set priority for
1093    ///   a stream about to be opened" pattern.
1094    ///
1095    /// IDs in the past that we do not currently track (already closed) and
1096    /// IDs too far in the future are silently dropped. The `MAX_PRIORITIES`
1097    /// ceiling is preserved as a defensive backstop if both filters are ever
1098    /// circumvented.
1099    ///
1100    /// Returns the same value semantics as [`Self::push_priority`].
1101    pub fn push_priority_guarded(
1102        &mut self,
1103        stream_id: StreamId,
1104        priority: parser::PriorityPart,
1105        last_stream_id: StreamId,
1106        open_streams: &HashMap<StreamId, GlobalStreamId>,
1107    ) -> bool {
1108        if !self.is_acceptable(stream_id, last_stream_id, open_streams) {
1109            trace!(
1110                "{} PRIORITY dropped for unknown/far stream {} (last_stream_id={})",
1111                log_module_context!(),
1112                stream_id,
1113                last_stream_id
1114            );
1115            return false;
1116        }
1117        self.push_priority(stream_id, priority)
1118    }
1119
1120    fn is_acceptable(
1121        &self,
1122        stream_id: StreamId,
1123        last_stream_id: StreamId,
1124        open_streams: &HashMap<StreamId, GlobalStreamId>,
1125    ) -> bool {
1126        if open_streams.contains_key(&stream_id) {
1127            return true;
1128        }
1129        // Idle stream ahead of the current counter: accept a small look-ahead.
1130        // Past IDs that are NOT in `open_streams` are closed — drop them.
1131        let upper = last_stream_id.saturating_add(PRIORITY_IDLE_LOOKAHEAD);
1132        stream_id > last_stream_id && stream_id <= upper
1133    }
1134
1135    /// Remove a stream's priority entry (called when the stream is recycled).
1136    pub fn remove(&mut self, stream_id: &StreamId) {
1137        self.priorities.remove(stream_id);
1138    }
1139
1140    /// Look up the priority for a stream, returning RFC 9218 defaults if absent.
1141    #[inline]
1142    pub fn get(&self, stream_id: &StreamId) -> (u8, bool) {
1143        self.priorities
1144            .get(stream_id)
1145            .copied()
1146            .unwrap_or((DEFAULT_URGENCY, false))
1147    }
1148
1149    /// Reorder a pre-sorted slice of writable stream IDs so that inside each
1150    /// urgency bucket, incremental streams appear after non-incremental ones,
1151    /// and the incremental tail is rotated by [`Self::incremental_cursor`]
1152    /// (RFC 9218 §4).
1153    ///
1154    /// The input `buf` must already be sorted by `(urgency, stream_id)`:
1155    /// this routine only partitions and rotates inside same-urgency
1156    /// contiguous runs, it does not re-sort.
1157    ///
1158    /// Returns the total number of incremental streams seen, so callers that
1159    /// need to update the cursor at the end of the write pass can early-exit
1160    /// when the count is zero.
1161    pub fn apply_incremental_rotation(&self, buf: &mut [StreamId]) -> usize {
1162        let mut total_incremental = 0usize;
1163        let mut i = 0;
1164        while i < buf.len() {
1165            let (urgency_i, _) = self.get(&buf[i]);
1166            let mut j = i + 1;
1167            while j < buf.len() {
1168                let (urgency_j, _) = self.get(&buf[j]);
1169                if urgency_j != urgency_i {
1170                    break;
1171                }
1172                j += 1;
1173            }
1174            // `buf[i..j]` is a contiguous run of same-urgency stream IDs.
1175            let bucket = &mut buf[i..j];
1176            if bucket.len() > 1 {
1177                // Stable partition: non-incremental first, incremental last,
1178                // each subrange staying in ascending stream-id order.
1179                bucket.sort_by_key(|id| self.get(id).1);
1180                let split = bucket.partition_point(|id| !self.get(id).1);
1181                let incremental_tail = &mut bucket[split..];
1182                if incremental_tail.len() > 1 {
1183                    // Rotate so the pass starts right after the stream that
1184                    // fired first previously. `partition_point` returns the
1185                    // first index whose stream ID > cursor (so cursor itself
1186                    // is still drained, but after the streams ahead of it).
1187                    let start =
1188                        incremental_tail.partition_point(|id| *id <= self.incremental_cursor);
1189                    incremental_tail.rotate_left(start);
1190                }
1191                total_incremental += incremental_tail.len();
1192            } else if bucket.len() == 1 && self.get(&bucket[0]).1 {
1193                total_incremental += 1;
1194            }
1195            i = j;
1196        }
1197        total_incremental
1198    }
1199
1200    /// Advance the RFC 9218 §4 round-robin cursor after a write pass.
1201    ///
1202    /// `first_incremental_fired` is the stream ID that headed the incremental
1203    /// tail we just drained; the next pass will start at the next stream
1204    /// after that ID. Callers may pass `None` when no incremental streams
1205    /// were eligible, leaving the cursor where it was.
1206    pub fn advance_incremental_cursor(&mut self, first_incremental_fired: Option<StreamId>) {
1207        if let Some(id) = first_incremental_fired {
1208            self.incremental_cursor = id;
1209        }
1210    }
1211}
1212
1213/// Connection-level flow control state (RFC 9113 §6.9).
1214pub struct H2FlowControl {
1215    /// Connection-level send window (can go negative per RFC 9113 §6.9.2).
1216    pub window: i32,
1217    /// Bytes received since last connection-level WINDOW_UPDATE.
1218    pub received_bytes_since_update: u32,
1219    /// Queued stream_id -> accumulated increment for WINDOW_UPDATE frames (O(1) coalescing).
1220    pub pending_window_updates: HashMap<u32, u32>,
1221}
1222
1223/// Byte accounting for connection overhead attribution.
1224pub struct H2ByteAccounting {
1225    /// Bytes read on the zero stream not yet attributed to a stream.
1226    pub zero_bytes_read: usize,
1227    /// Overhead bytes received (connection-level frames).
1228    pub overhead_bin: usize,
1229    /// Overhead bytes sent (connection-level frames).
1230    pub overhead_bout: usize,
1231}
1232
1233/// Connection draining state for graceful shutdown.
1234pub struct H2DrainState {
1235    /// True when we've sent GOAWAY and are draining.
1236    pub draining: bool,
1237    /// Last stream ID from peer's GOAWAY (for retry decisions).
1238    pub peer_last_stream_id: Option<StreamId>,
1239    /// Wall-clock timestamp captured the first time this connection entered
1240    /// `draining` during soft-stop. Used together with
1241    /// [`Self::graceful_shutdown_deadline`] to decide when to force-close.
1242    /// Remains `None` until the proxy-initiated drain begins (peer-initiated
1243    /// drains via `handle_goaway_frame` don't arm the forced-close timer —
1244    /// the caller in `Mux::shutting_down` is the only writer).
1245    pub started_at: Option<Instant>,
1246    /// Wall-clock budget granted to in-flight streams after the initial
1247    /// `GOAWAY(NO_ERROR)`. `None` means "wait indefinitely" (knob value `0`).
1248    /// Default when unset upstream: 5 s (see `L7ListenerHandler`).
1249    pub graceful_shutdown_deadline: Option<std::time::Duration>,
1250}
1251
1252pub struct ConnectionH2<Front: SocketHandler> {
1253    /// Connection/session ULID propagated from the parent [`Mux`]. Used to
1254    /// stamp the session slot of the `[session req cluster backend]` log
1255    /// prefix emitted by this module's `log_context!` / `log_context_stream!`
1256    /// macros.
1257    pub session_ulid: Ulid,
1258    pub decoder: loona_hpack::Decoder<'static>,
1259    pub encoder: loona_hpack::Encoder<'static>,
1260    pub expect_read: Option<(H2StreamId, usize)>,
1261    pub expect_write: Option<H2StreamId>,
1262    pub last_stream_id: StreamId,
1263    pub local_settings: H2Settings,
1264    pub peer_settings: H2Settings,
1265    pub position: Position,
1266    pub prioriser: Prioriser,
1267    pub readiness: Readiness,
1268    pub socket: Front,
1269    pub state: H2State,
1270    pub streams: HashMap<StreamId, GlobalStreamId>,
1271    pub timeout_container: TimeoutContainer,
1272    /// Connection-level flow control state (send window, receive tracking, pending updates).
1273    pub flow_control: H2FlowControl,
1274    /// Highest stream ID accepted from the peer (used for GoAway last_stream_id).
1275    pub highest_peer_stream_id: StreamId,
1276    /// RFC 7541 §4.2 / §6.3 pending dynamic-table-size-update signal.
1277    ///
1278    /// `Some(new_size)` when a peer SETTINGS frame adjusted
1279    /// `SETTINGS_HEADER_TABLE_SIZE` and we have not yet prepended the
1280    /// matching `001xxxxx` HPACK directive to a header block. Consumed and
1281    /// cleared by [`H2BlockConverter::emit_pending_size_update_if_new_block`]
1282    /// on the next `Block::StatusLine` or `Block::Header` encoded for the
1283    /// connection. Until then the peer's decoder still has its previous
1284    /// (possibly larger) table cap, so emitting is a correctness
1285    /// requirement, not a nicety — see the RFC 9113 encoder-decoder
1286    /// synchronisation contract (§6.5.2).
1287    pub pending_table_size_update: Option<u32>,
1288    /// Reusable buffer for HPACK-encoded headers in the H2 block converter.
1289    pub converter_buf: Vec<u8>,
1290    /// Reusable buffer for lowercasing header keys in the H2 block converter.
1291    pub lowercase_buf: Vec<u8>,
1292    /// Reusable buffer for assembling cookie values in the H2 block converter.
1293    pub cookie_buf: Vec<u8>,
1294    /// Connection draining state for graceful shutdown.
1295    pub drain: H2DrainState,
1296    pub zero: GenericHttpStream,
1297    /// Byte accounting for connection overhead attribution.
1298    pub bytes: H2ByteAccounting,
1299    /// Flood detector for CVE mitigations (Rapid Reset, CONTINUATION, Ping, Settings floods).
1300    pub flood_detector: H2FloodDetector,
1301    /// RFC 9113 §6.5: timestamp when we sent SETTINGS and are awaiting ACK.
1302    /// If the peer does not ACK within SETTINGS_ACK_TIMEOUT, we send GOAWAY
1303    /// with SettingsTimeout error.
1304    pub settings_sent_at: Option<Instant>,
1305    /// Queued RST_STREAM frames to send: Vec<(stream_id, error_code)>.
1306    /// Used when refusing streams (MAX_CONCURRENT_STREAMS, buffer exhaustion)
1307    /// during readable — the actual write happens in the writable preamble
1308    /// to avoid conflicting with kawa.storage usage for frame payload discard.
1309    pub pending_rst_streams: Vec<(StreamId, H2Error)>,
1310    /// RFC 9113 §6.8: tracks stream IDs for which RST_STREAM has already been sent,
1311    /// preventing duplicate RST_STREAM frames on the wire.
1312    pub rst_sent: HashSet<StreamId>,
1313    /// Lifetime counter of RST_STREAM frames queued (pending + already flushed).
1314    /// Used to detect sustained misbehavior even when writable() drains the
1315    /// pending queue between readable() calls.
1316    pub total_rst_streams_queued: usize,
1317    /// Reusable buffer for priority-sorted stream IDs in write_streams().
1318    /// Cleared and reused each call to avoid per-frame allocation.
1319    priorities_buf: Vec<StreamId>,
1320    /// True once we've asked rustls to emit TLS close_notify for this frontend.
1321    close_notify_sent: bool,
1322    /// Per-listener H2 connection tuning (window size, max streams, shrink ratio).
1323    pub connection_config: H2ConnectionConfig,
1324    /// Maximum pending WINDOW_UPDATE entries before dropping.
1325    /// Derived from `connection_config.max_concurrent_streams` at construction.
1326    max_pending_window_updates: usize,
1327    /// Last `(connection_window, active_streams, pending_window_updates)` snapshot
1328    /// emitted by [`Self::gauge_connection_state`]. The snapshot represents this
1329    /// connection's *contribution* to the three `h2.connection.*` aggregate
1330    /// gauges; each call emits the signed delta against this snapshot via
1331    /// [`gauge_add!`] so the gauge sums across connections.
1332    ///
1333    /// Stays `None` until the first emission. [`Drop`] applies the negative of
1334    /// this snapshot so the connection's contribution is always rebalanced to
1335    /// zero on teardown — independent of which close path runs.
1336    last_gauge_snapshot: Option<(usize, usize, usize)>,
1337    /// Per-stream wall-clock timestamp of last meaningful activity (DATA or
1338    /// HEADERS frame receipt). Used to cancel streams that make no forward
1339    /// progress within [`Self::stream_idle_timeout`] — mitigates slow-multiplex
1340    /// Slowloris: connection-level idle timers reset on every frame, so a
1341    /// misbehaving peer can otherwise pin up to `max_concurrent_streams` slots
1342    /// for the full nominal connection timeout.
1343    ///
1344    /// Initialized when the stream is created and refreshed on each non-empty
1345    /// inbound DATA frame and on HEADERS for an existing stream (trailers).
1346    /// Empty DATA frames (CVE-2019-9518 vector) do NOT refresh the timer.
1347    pub stream_last_activity_at: HashMap<StreamId, Instant>,
1348    /// Per-stream idle cap. Streams with no activity for longer than this are
1349    /// RST_STREAM(CANCEL)'d by [`Self::cancel_timed_out_streams`].
1350    pub stream_idle_timeout: std::time::Duration,
1351    /// RFC 9113 §5.1.2 back-pressure: count of stream refusals
1352    /// (REFUSED_STREAM emitted via [`Self::refuse_stream_and_discard`]) within
1353    /// the current back-pressure window. When the count exceeds
1354    /// [`BACKPRESSURE_REFUSAL_THRESHOLD`] inside one
1355    /// [`BACKPRESSURE_WINDOW_DURATION`] we halve the advertised
1356    /// `SETTINGS_MAX_CONCURRENT_STREAMS` to signal the peer to slow down.
1357    refuse_count_window: u32,
1358    /// Start timestamp for the current back-pressure window.
1359    refuse_window_start: Instant,
1360    /// Set once we have halved `local_settings.settings_max_concurrent_streams`
1361    /// in response to a refusal burst. Prevents the cap from collapsing to 0
1362    /// on sustained abuse — a single halving per connection is sufficient to
1363    /// signal back-pressure; further bursts trigger `EnhanceYourCalm`.
1364    mcs_backpressure_applied: bool,
1365}
1366impl<Front: SocketHandler> std::fmt::Debug for ConnectionH2<Front> {
1367    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1368        f.debug_struct("ConnectionH2")
1369            .field("position", &self.position)
1370            .field("state", &self.state)
1371            .field("expect", &self.expect_read)
1372            .field("readiness", &self.readiness)
1373            .field("local_settings", &self.local_settings)
1374            .field("peer_settings", &self.peer_settings)
1375            .field("socket", &self.socket.socket_ref())
1376            .field("streams", &self.streams)
1377            .field("zero", &self.zero.storage.meter(20))
1378            .field("window", &self.flow_control.window)
1379            .field("total_rst_streams_queued", &self.total_rst_streams_queued)
1380            .finish()
1381    }
1382}
1383
1384/// Symmetric tear-down for the three `h2.connection.*` aggregate gauges:
1385/// whatever positive contribution this connection made via
1386/// [`ConnectionH2::gauge_connection_state`] is subtracted back out when the
1387/// connection is dropped.
1388///
1389/// Using `Drop` (rather than wiring decrements into every close path —
1390/// `graceful_goaway`, `force_disconnect`, `handle_goaway_frame`, `Mux::close`,
1391/// stream-id exhaustion, panic-unwind) is what guarantees the gauge is
1392/// arithmetically symmetric regardless of which path teardown took. Past
1393/// underflow incidents (commits a650ad69, d2f01ed4) have all been
1394/// missing-decrement bugs that `Drop` makes structurally impossible.
1395impl<Front: SocketHandler> Drop for ConnectionH2<Front> {
1396    fn drop(&mut self) {
1397        self.release_connection_gauges();
1398    }
1399}
1400
1401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1402pub enum H2StreamId {
1403    Zero,
1404    Other { id: StreamId, gid: GlobalStreamId },
1405}
1406
1407impl<Front: SocketHandler> ConnectionH2<Front> {
1408    fn frontend_hung_up_while_draining(&self) -> bool {
1409        matches!(self.position, Position::Server)
1410            && self.drain.draining
1411            && (self.readiness.event.is_hup() || self.readiness.event.is_error())
1412    }
1413
1414    /// Once the final GOAWAY has been queued and all streams/control frames are
1415    /// gone, a peer-side HUP/ERR means any remaining rustls backlog is no
1416    /// longer deliverable. Waiting on `socket_wants_write()` in that state can
1417    /// deadlock shutdown forever because GOAWAY disables further frame reads.
1418    fn peer_gone_after_final_goaway(&self) -> bool {
1419        self.frontend_hung_up_while_draining()
1420            && matches!(self.state, H2State::GoAway | H2State::Error)
1421            && self.streams.is_empty()
1422            && self.expect_write.is_none()
1423            && self.zero.storage.is_empty()
1424    }
1425
1426    /// Shared constructor for both server and client H2 connections.
1427    ///
1428    /// Differences between server and client are captured by the caller-provided
1429    /// `position`, `expect_read`, and `readiness_interest` parameters.
1430    #[allow(clippy::too_many_arguments)]
1431    pub(super) fn new(
1432        session_ulid: Ulid,
1433        socket: Front,
1434        position: super::Position,
1435        pool: std::rc::Weak<std::cell::RefCell<crate::pool::Pool>>,
1436        flood_config: H2FloodConfig,
1437        connection_config: H2ConnectionConfig,
1438        stream_idle_timeout: std::time::Duration,
1439        graceful_shutdown_deadline: Option<std::time::Duration>,
1440        timeout_container: crate::timer::TimeoutContainer,
1441        expect_read: Option<(H2StreamId, usize)>,
1442        readiness_interest: sozu_command::ready::Ready,
1443    ) -> Option<Self> {
1444        let buffer = pool
1445            .upgrade()
1446            .and_then(|pool| pool.borrow_mut().checkout())?;
1447        let local_settings = H2Settings {
1448            settings_max_concurrent_streams: connection_config.max_concurrent_streams,
1449            ..H2Settings::default()
1450        };
1451        let mut decoder = loona_hpack::Decoder::new();
1452        // RFC 7541 §4.2: enforce SETTINGS_HEADER_TABLE_SIZE as the upper bound
1453        // for dynamic table size updates from the peer
1454        decoder.set_max_allowed_table_size(local_settings.settings_header_table_size as usize);
1455        Some(ConnectionH2 {
1456            session_ulid,
1457            decoder,
1458            encoder: loona_hpack::Encoder::new(),
1459            expect_read,
1460            expect_write: None,
1461            last_stream_id: 0,
1462            local_settings,
1463            peer_settings: H2Settings::default(),
1464            position,
1465            prioriser: Prioriser::default(),
1466            readiness: crate::Readiness {
1467                interest: readiness_interest,
1468                event: Ready::EMPTY,
1469            },
1470            socket,
1471            state: H2State::ClientPreface,
1472            streams: std::collections::HashMap::with_capacity(8),
1473            timeout_container,
1474            flow_control: H2FlowControl {
1475                window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
1476                received_bytes_since_update: 0,
1477                pending_window_updates: HashMap::new(),
1478            },
1479            highest_peer_stream_id: 0,
1480            pending_table_size_update: None,
1481            converter_buf: Vec::new(),
1482            lowercase_buf: Vec::new(),
1483            cookie_buf: Vec::new(),
1484            drain: H2DrainState {
1485                draining: false,
1486                peer_last_stream_id: None,
1487                started_at: None,
1488                graceful_shutdown_deadline,
1489            },
1490            zero: kawa::Kawa::new(kawa::Kind::Request, kawa::Buffer::new(buffer)),
1491            bytes: H2ByteAccounting {
1492                zero_bytes_read: 0,
1493                overhead_bin: 0,
1494                overhead_bout: 0,
1495            },
1496            flood_detector: H2FloodDetector::new(flood_config),
1497            settings_sent_at: None,
1498            pending_rst_streams: Vec::new(),
1499            rst_sent: std::collections::HashSet::new(),
1500            total_rst_streams_queued: 0,
1501            priorities_buf: Vec::new(),
1502            close_notify_sent: false,
1503            max_pending_window_updates: 1 + connection_config.max_concurrent_streams as usize * 4,
1504            connection_config,
1505            last_gauge_snapshot: None,
1506            stream_last_activity_at: HashMap::new(),
1507            stream_idle_timeout,
1508            refuse_count_window: 0,
1509            refuse_window_start: Instant::now(),
1510            mcs_backpressure_applied: false,
1511        })
1512    }
1513
1514    /// Start TLS close_notify on the frontend and keep the session alive until
1515    /// rustls has flushed the generated records.
1516    pub fn initiate_close_notify(&mut self) -> bool {
1517        if !self.position.is_server()
1518            || matches!(
1519                self.state,
1520                H2State::ClientPreface | H2State::ClientSettings | H2State::ServerSettings
1521            )
1522        {
1523            return false;
1524        }
1525        if !self.close_notify_sent {
1526            trace!("{} H2 initiating CLOSE_NOTIFY", log_context!(self));
1527            self.socket.socket_close();
1528            self.close_notify_sent = true;
1529        }
1530        if self.socket.socket_wants_write() {
1531            self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
1532            self.ensure_tls_flushed();
1533            true
1534        } else {
1535            false
1536        }
1537    }
1538
1539    fn expect_header(&mut self) {
1540        self.state = H2State::Header;
1541        self.expect_read = Some((H2StreamId::Zero, 9));
1542    }
1543
1544    /// Process the `H2State::Header` state: parse a 9-byte frame header from
1545    /// `self.zero`, validate the stream, create new streams if needed, and
1546    /// transition to `H2State::Frame` for the payload.
1547    ///
1548    /// Returns `MuxResult` — the caller should propagate the result directly.
1549    fn handle_header_state<L>(&mut self, context: &mut Context<L>) -> MuxResult
1550    where
1551        L: ListenerHandler + L7ListenerHandler,
1552    {
1553        let i = self.zero.storage.data();
1554        trace!("{}   header: {:?}", log_context!(self), i);
1555        match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
1556            Ok((_, header)) => {
1557                trace!("{} {:#?}", log_context!(self), header);
1558                self.zero.storage.clear();
1559                let stream_id = header.stream_id;
1560                // RFC 9113 §6.10: CONTINUATION frames MUST be preceded by a
1561                // HEADERS or PUSH_PROMISE frame without END_HEADERS. When we
1562                // reach `handle_header_state`, we are between frames and no
1563                // header block is in progress (otherwise the state would be
1564                // `H2State::ContinuationHeader`). A CONTINUATION frame arriving
1565                // here is therefore standalone and MUST be treated as a
1566                // connection error of type PROTOCOL_ERROR.
1567                if header.frame_type == FrameType::Continuation {
1568                    error!(
1569                        "{} standalone CONTINUATION frame on stream {} without preceding HEADERS",
1570                        log_context!(self),
1571                        stream_id
1572                    );
1573                    return self.goaway(H2Error::ProtocolError);
1574                }
1575                // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
1576                // Route unknown frames (and any stream_id == 0 control frame)
1577                // through stream 0 (the connection-level buffer) so
1578                // `handle_frame` can drop them without touching stream state.
1579                let read_stream = if stream_id == 0
1580                    || matches!(header.frame_type, FrameType::Unknown(_))
1581                {
1582                    H2StreamId::Zero
1583                } else if let Some(global_stream_id) = self.streams.get(&stream_id) {
1584                    let allowed_on_half_closed = header.frame_type == FrameType::WindowUpdate
1585                        || header.frame_type == FrameType::Priority
1586                        || header.frame_type == FrameType::RstStream;
1587                    let stream = &context.streams[*global_stream_id];
1588                    // Use the position-aware end_of_stream flag:
1589                    // - Server reads from front (client requests)
1590                    // - Client reads from back (backend responses)
1591                    let received_eos = if self.position.is_server() {
1592                        stream.front_received_end_of_stream
1593                    } else {
1594                        stream.back_received_end_of_stream
1595                    };
1596                    trace!(
1597                        "{} REQUESTING EXISTING STREAM {}: {}/{:?}",
1598                        log_context!(self),
1599                        stream_id,
1600                        received_eos,
1601                        stream.state
1602                    );
1603                    if !allowed_on_half_closed && (received_eos || !stream.state.is_open()) {
1604                        error!(
1605                            "{} CANNOT RECEIVE {:?} ON THIS STREAM {:?}",
1606                            log_context!(self),
1607                            header.frame_type,
1608                            stream.state
1609                        );
1610                        return self.goaway(H2Error::StreamClosed);
1611                    }
1612                    // RFC 9113 §8.1: a HEADERS frame received in the body
1613                    // phase is a trailer block and MUST carry END_STREAM. This
1614                    // closes the request-smuggling primitive where a peer sends
1615                    // HEADERS, DATA, HEADERS (no END_STREAM) to chain header
1616                    // blocks on the same stream ID.
1617                    //
1618                    // Discriminate from the read-side Kawa parsing phase rather
1619                    // than stream existence: on Position::Client the stream is
1620                    // created when we send the request to the backend, so the
1621                    // initial backend response HEADERS legitimately arrives on
1622                    // an existing stream. Similarly, 1xx→final transitions on
1623                    // either side may yield multiple HEADERS frames before the
1624                    // body begins (kawa clears back to initial / terminated on
1625                    // 1xx; neither is main_phase). Only HEADERS arriving once
1626                    // the read side has transitioned to Body/Chunks parsing —
1627                    // i.e. after headers were fully consumed and body framing
1628                    // is in progress — may be a trailer.
1629                    let read_in_body = if self.position.is_server() {
1630                        stream.front.is_main_phase()
1631                    } else {
1632                        stream.back.is_main_phase()
1633                    };
1634                    if header.frame_type == FrameType::Headers
1635                        && read_in_body
1636                        && header.flags & parser::FLAG_END_STREAM == 0
1637                    {
1638                        error!(
1639                            "{} HEADERS without END_STREAM on open stream {} in body phase: trailers MUST carry END_STREAM",
1640                            log_context!(self),
1641                            stream_id
1642                        );
1643                        return self.goaway(H2Error::ProtocolError);
1644                    }
1645                    if header.frame_type == FrameType::Data {
1646                        H2StreamId::Other {
1647                            id: stream_id,
1648                            gid: *global_stream_id,
1649                        }
1650                    } else {
1651                        H2StreamId::Zero
1652                    }
1653                } else {
1654                    // RFC 9113 §5.1.1: stream identifiers MUST be strictly
1655                    // increasing. Tightened from `>=` to `>` so that a peer
1656                    // cannot re-use `self.last_stream_id` (which would
1657                    // conflict with our own server-pushed streams if we
1658                    // ever enable push in the future). For the first
1659                    // request on a fresh connection `last_stream_id == 0`
1660                    // and any client-initiated odd stream still passes.
1661                    if header.frame_type == FrameType::Headers
1662                        && self.position.is_server()
1663                        && stream_id & 1 == 1
1664                        && stream_id > self.last_stream_id
1665                    {
1666                        // RFC 9113 §6.8: after sending a GOAWAY, the proxy
1667                        // MUST NOT accept new streams.
1668                        // `graceful_goaway` sets `drain.draining = true`
1669                        // and sends an initial GOAWAY with last_stream_id =
1670                        // STREAM_ID_MAX (so in-flight requests are still
1671                        // accepted), but the contract for *new* peer-
1672                        // initiated streams is that they must be refused.
1673                        // Without this check, a peer racing the drain
1674                        // window could open arbitrary new streams between
1675                        // the initial and final GOAWAY emission.
1676                        if self.drain.draining {
1677                            if stream_id > self.highest_peer_stream_id {
1678                                self.highest_peer_stream_id = stream_id;
1679                            }
1680                            return self.refuse_stream_and_discard(
1681                                stream_id,
1682                                H2Error::RefusedStream,
1683                                header.payload_len,
1684                            );
1685                        }
1686                        if self.streams.len()
1687                            >= self.local_settings.settings_max_concurrent_streams as usize
1688                        {
1689                            error!(
1690                                "{} MAX CONCURRENT STREAMS: limit={}, current={}",
1691                                log_context!(self),
1692                                self.local_settings.settings_max_concurrent_streams,
1693                                self.streams.len()
1694                            );
1695                            // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
1696                            // queueing RST_STREAM so GOAWAY reports the correct
1697                            // last_stream_id if the connection closes later.
1698                            if stream_id > self.highest_peer_stream_id {
1699                                self.highest_peer_stream_id = stream_id;
1700                            }
1701                            return self.refuse_stream_and_discard(
1702                                stream_id,
1703                                H2Error::RefusedStream,
1704                                header.payload_len,
1705                            );
1706                        }
1707                        match self.create_stream(stream_id, context) {
1708                            Some(_) => {}
1709                            None => {
1710                                // Buffer pool exhaustion is transient — refuse
1711                                // this stream but keep the connection alive so
1712                                // existing streams can complete and free buffers.
1713                                error!(
1714                                    "{} Could not create stream {}: buffer pool exhausted",
1715                                    log_context!(self),
1716                                    stream_id
1717                                );
1718                                // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
1719                                // queueing RST_STREAM so GOAWAY reports the correct
1720                                // last_stream_id if the connection closes later.
1721                                if stream_id > self.highest_peer_stream_id {
1722                                    self.highest_peer_stream_id = stream_id;
1723                                }
1724                                return self.refuse_stream_and_discard(
1725                                    stream_id,
1726                                    H2Error::RefusedStream,
1727                                    header.payload_len,
1728                                );
1729                            }
1730                        }
1731                    } else if header.frame_type != FrameType::Priority {
1732                        // Distinguish closed vs idle: check whether the stream
1733                        // was previously opened. For Server position, compare
1734                        // against highest_peer_stream_id (client-initiated).
1735                        // For Client position, compare against last_stream_id
1736                        // (our own initiated streams) since the peer never
1737                        // initiates streams on a backend connection.
1738                        let is_closed_stream = if self.position.is_server() {
1739                            header.stream_id <= self.highest_peer_stream_id
1740                        } else {
1741                            header.stream_id < self.last_stream_id
1742                        };
1743                        if is_closed_stream {
1744                            match header.frame_type {
1745                                FrameType::RstStream | FrameType::WindowUpdate => {
1746                                    // RFC 9113 §5.1: RST_STREAM and WINDOW_UPDATE
1747                                    // on a closed stream can arrive due to race
1748                                    // conditions and should be consumed/discarded.
1749                                    debug!(
1750                                        "{} Ignoring {:?} on closed stream {}",
1751                                        log_context!(self),
1752                                        header.frame_type,
1753                                        header.stream_id
1754                                    );
1755                                    self.flood_detector.glitch_count += 1;
1756                                    check_flood_or_return!(self);
1757                                }
1758                                FrameType::Data => {
1759                                    // RFC 9113 §5.1: DATA on a closed stream is a
1760                                    // stream error of type STREAM_CLOSED. Queue
1761                                    // RST_STREAM (not GOAWAY) to preserve the
1762                                    // connection for other streams. The payload is
1763                                    // still routed through stream 0 so handle_frame
1764                                    // can do connection-level flow control accounting.
1765                                    debug!(
1766                                        "{} DATA on closed stream {}, sending RST_STREAM(STREAM_CLOSED)",
1767                                        log_context!(self),
1768                                        header.stream_id
1769                                    );
1770                                    self.flood_detector.glitch_count += 1;
1771                                    check_flood_or_return!(self);
1772                                    if let Some(result) =
1773                                        self.enqueue_rst(header.stream_id, H2Error::StreamClosed)
1774                                    {
1775                                        return result;
1776                                    }
1777                                }
1778                                _ => {
1779                                    // RFC 9113 §5.1: HEADERS or other frames on a
1780                                    // closed stream → connection error STREAM_CLOSED.
1781                                    error!(
1782                                        "{} Received {:?} on closed stream {}, sending GOAWAY(STREAM_CLOSED)",
1783                                        log_context!(self),
1784                                        header.frame_type,
1785                                        header.stream_id
1786                                    );
1787                                    return self.goaway(H2Error::StreamClosed);
1788                                }
1789                            }
1790                        } else {
1791                            error!(
1792                                "{} Received {:?} on idle stream {}, sending GOAWAY(PROTOCOL_ERROR)",
1793                                log_context!(self),
1794                                header.frame_type,
1795                                header.stream_id
1796                            );
1797                            return self.goaway(H2Error::ProtocolError);
1798                        }
1799                    }
1800                    H2StreamId::Zero
1801                };
1802                trace!(
1803                    "{} {} {:?} {:#?}",
1804                    log_context!(self),
1805                    header.stream_id,
1806                    stream_id,
1807                    self.streams
1808                );
1809                self.expect_read = Some((read_stream, header.payload_len as usize));
1810                self.state = H2State::Frame(header);
1811            }
1812            Err(error) => {
1813                let error = error_nom_to_h2(error);
1814                error!("{} COULD NOT PARSE FRAME HEADER", log_context!(self));
1815                return self.goaway(error);
1816            }
1817        };
1818        MuxResult::Continue
1819    }
1820
1821    /// Process the `H2State::ContinuationHeader` state: parse a CONTINUATION
1822    /// frame header from `self.zero`, validate stream ID continuity, track
1823    /// flood detection counters, and transition to `ContinuationFrame`.
1824    ///
1825    /// The `headers` parameter is the accumulated HEADERS context from the
1826    /// initial HEADERS frame (cloned from the state enum to avoid borrow
1827    /// conflicts).
1828    fn handle_continuation_header_state(&mut self, headers: &Headers) -> MuxResult {
1829        let i = self.zero.storage.unparsed_data();
1830        trace!("{}   continuation header: {:?}", log_context!(self), i);
1831        match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
1832            Ok((
1833                _,
1834                FrameHeader {
1835                    payload_len,
1836                    frame_type: FrameType::Continuation,
1837                    flags,
1838                    stream_id,
1839                },
1840            )) => {
1841                if self.zero.storage.end < 9 {
1842                    error!(
1843                        "{} CONTINUATION header: storage.end ({}) too small to remove frame header",
1844                        log_context!(self),
1845                        self.zero.storage.end
1846                    );
1847                    return self.goaway(H2Error::InternalError);
1848                }
1849                self.zero.storage.end -= 9;
1850                if stream_id != headers.stream_id {
1851                    error!(
1852                        "{} CONTINUATION stream_id {} does not match HEADERS stream_id {}",
1853                        log_context!(self),
1854                        stream_id,
1855                        headers.stream_id
1856                    );
1857                    return self.goaway(H2Error::ProtocolError);
1858                }
1859                // CVE-2024-27316: track CONTINUATION frame count and accumulated size
1860                self.flood_detector.continuation_count += 1;
1861                self.flood_detector.accumulated_header_size = self
1862                    .flood_detector
1863                    .accumulated_header_size
1864                    .saturating_add(payload_len);
1865                check_flood_or_return!(self);
1866                // RFC 9113 §10.5.1: reject header blocks that cannot be
1867                // buffered. Previously we silently removed READABLE interest
1868                // when amount > available_space, stalling the connection.
1869                // If the payload still fits in our zero buffer we can refuse
1870                // just this stream (RST_STREAM + drain); if not, the
1871                // connection can no longer decode header blocks safely and we
1872                // escalate to GOAWAY(EnhanceYourCalm).
1873                if self.flood_detector.accumulated_header_size
1874                    > self.flood_detector.config.max_header_list_size
1875                {
1876                    error!(
1877                        "{} CONTINUATION accumulated header size {} exceeds {}",
1878                        log_context!(self),
1879                        self.flood_detector.accumulated_header_size,
1880                        self.flood_detector.config.max_header_list_size
1881                    );
1882                    if (payload_len as usize) > self.zero.storage.available_space() {
1883                        return self.goaway(H2Error::EnhanceYourCalm);
1884                    }
1885                    // Remove the already-created stream slot before refusing,
1886                    // so it does not leak against MAX_CONCURRENT_STREAMS. Route
1887                    // through `remove_dead_stream` so the expect_write/read
1888                    // invariant (§LIFECYCLE.md 5.4) holds on this path too.
1889                    if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
1890                        self.remove_dead_stream(stream_id, global_stream_id);
1891                    }
1892                    return self.refuse_stream_and_discard(
1893                        stream_id,
1894                        H2Error::RefusedStream,
1895                        payload_len,
1896                    );
1897                }
1898                if (payload_len as usize) > self.zero.storage.available_space() {
1899                    error!(
1900                        "{} CONTINUATION payload {} exceeds buffer space {}",
1901                        log_context!(self),
1902                        payload_len,
1903                        self.zero.storage.available_space()
1904                    );
1905                    return self.goaway(H2Error::EnhanceYourCalm);
1906                }
1907                self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
1908                let mut headers = headers.clone();
1909                headers.end_headers = flags & parser::FLAG_END_HEADERS != 0;
1910                headers.header_block_fragment.len = headers
1911                    .header_block_fragment
1912                    .len
1913                    .saturating_add(payload_len);
1914                self.state = H2State::ContinuationFrame(headers);
1915            }
1916            Err(error) => {
1917                let error = error_nom_to_h2(error);
1918                error!("{} COULD NOT PARSE CONTINUATION HEADER", log_context!(self));
1919                return self.goaway(error);
1920            }
1921            other => {
1922                error!(
1923                    "{} UNEXPECTED {:?} WHILE PARSING CONTINUATION HEADER",
1924                    log_context!(self),
1925                    other
1926                );
1927                return self.goaway(H2Error::ProtocolError);
1928            }
1929        };
1930        MuxResult::Continue
1931    }
1932
1933    pub fn readable<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
1934    where
1935        E: Endpoint,
1936        L: ListenerHandler + L7ListenerHandler,
1937    {
1938        self.prune_inactive_streams_while_closing(context);
1939        // Pass 4 Medium #3: per-stream idle guard. Slow-multiplex Slowloris
1940        // sends one byte or a control frame per stream just often enough to
1941        // reset the connection-level timer; per-stream deadlines catch it.
1942        self.cancel_timed_out_streams(context, &mut endpoint);
1943
1944        // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
1945        if let Some(sent_at) = self.settings_sent_at {
1946            if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
1947                warn!(
1948                    "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
1949                    log_context!(self),
1950                    SETTINGS_ACK_TIMEOUT
1951                );
1952                return self.goaway(H2Error::SettingsTimeout);
1953            }
1954        }
1955
1956        // Don't reset the timeout unconditionally here. Only application data
1957        // (DATA/HEADERS frames) should reset the timeout. H2 control frames
1958        // (PING, WINDOW_UPDATE, SETTINGS) must NOT reset it, otherwise a peer
1959        // sending periodic PINGs prevents timeout detection on stuck sessions.
1960        // The timeout is reset:
1961        // - Below, when reading DATA payload (H2StreamId::Other)
1962        // - In handle_frame(), when processing HEADERS frames
1963        let (stream_id, kawa) = if let Some((stream_id, amount)) = self.expect_read {
1964            let (kawa, did) = match stream_id {
1965                H2StreamId::Zero => (&mut self.zero, usize::MAX),
1966                H2StreamId::Other {
1967                    gid: global_stream_id,
1968                    ..
1969                } => {
1970                    // Reading DATA frame payload for an application stream.
1971                    // This is real application activity — reset the timeout.
1972                    self.timeout_container.reset();
1973                    (
1974                        context.streams[global_stream_id]
1975                            .split(&self.position)
1976                            .rbuffer,
1977                        global_stream_id,
1978                    )
1979                }
1980            };
1981            trace!(
1982                "{} {:?}({:?}, {})",
1983                log_context!(self),
1984                self.state,
1985                stream_id,
1986                amount
1987            );
1988            if amount > 0 {
1989                if amount > kawa.storage.available_space() {
1990                    self.readiness.interest.remove(Ready::READABLE);
1991                    return MuxResult::Continue;
1992                }
1993                let (size, status) = self.socket.socket_read(&mut kawa.storage.space()[..amount]);
1994                context.debug.push(DebugEvent::SocketIO(0, did, size));
1995                kawa.storage.fill(size);
1996                self.position.count_bytes_in_counter(size);
1997                self.bytes.zero_bytes_read += size;
1998                if update_readiness_after_read(size, status, &mut self.readiness) {
1999                    if matches!(self.position, Position::Server)
2000                        && self.drain.draining
2001                        && matches!(status, SocketResult::Closed | SocketResult::Error)
2002                    {
2003                        // During graceful drain, a frontend EOF/HUP means no
2004                        // further frame headers or payload bytes can arrive.
2005                        // Keeping expect_read here strands the connection in
2006                        // Header/Frame forever even after the peer is gone.
2007                        self.expect_read = None;
2008                    }
2009                    return MuxResult::Continue;
2010                } else if size == amount {
2011                    self.expect_read = None;
2012                } else {
2013                    self.expect_read = Some((stream_id, amount - size));
2014                    if let (H2State::ClientPreface, Position::Server) =
2015                        (&self.state, &self.position)
2016                    {
2017                        let i = kawa.storage.data();
2018                        if !b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n".starts_with(i) {
2019                            debug!("{} EARLY INVALID PREFACE: {:?}", log_context!(self), i);
2020                            return self.force_disconnect();
2021                        }
2022                    }
2023                    return MuxResult::Continue;
2024                }
2025            } else {
2026                self.expect_read = None;
2027            }
2028            (stream_id, kawa)
2029        } else {
2030            self.readiness.event.remove(Ready::READABLE);
2031            return MuxResult::Continue;
2032        };
2033        match (&self.state, &self.position) {
2034            (H2State::Error, _)
2035            | (H2State::GoAway, _)
2036            | (H2State::ServerSettings, Position::Server)
2037            | (H2State::ClientPreface, Position::Client(..))
2038            | (H2State::ClientSettings, Position::Client(..)) => {
2039                error!(
2040                    "{} Unexpected combination: (Readable, {:?}, {:?})",
2041                    log_context!(self),
2042                    self.state,
2043                    self.position
2044                );
2045                return self.force_disconnect();
2046            }
2047            (H2State::Discard, _) => {
2048                let _i = kawa.storage.data();
2049                trace!("{} DISCARDING: {:?}", log_context!(self), _i);
2050                kawa.storage.clear();
2051                self.attribute_bytes_to_overhead();
2052                self.expect_header();
2053            }
2054            (H2State::ClientPreface, Position::Server) => {
2055                let i = kawa.storage.data();
2056                let i = match parser::preface(i) {
2057                    Ok((i, _)) => i,
2058                    Err(_) => return self.force_disconnect(),
2059                };
2060                match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2061                    Ok((
2062                        _,
2063                        FrameHeader {
2064                            payload_len,
2065                            frame_type: FrameType::Settings,
2066                            flags: 0,
2067                            stream_id: 0,
2068                        },
2069                    )) => {
2070                        kawa.storage.clear();
2071                        self.state = H2State::ClientSettings;
2072                        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2073                    }
2074                    _ => return self.force_disconnect(),
2075                };
2076            }
2077            (H2State::ClientSettings, Position::Server) => {
2078                let i = kawa.storage.data();
2079                let settings = match parser::settings_frame(
2080                    i,
2081                    &FrameHeader {
2082                        payload_len: i.len() as u32,
2083                        frame_type: FrameType::Settings,
2084                        flags: 0,
2085                        stream_id: 0,
2086                    },
2087                ) {
2088                    Ok((_, settings)) => {
2089                        kawa.storage.clear();
2090                        settings
2091                    }
2092                    Err(_) => return self.force_disconnect(),
2093                };
2094                let kawa = &mut self.zero;
2095                match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
2096                    Ok((_, size)) => {
2097                        kawa.storage.fill(size);
2098                        incr!(names::h2::FRAMES_TX_SETTINGS);
2099                        // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
2100                        self.settings_sent_at = Some(Instant::now());
2101                    }
2102                    Err(error) => {
2103                        error!(
2104                            "{} Could not serialize SettingsFrame: {:?}",
2105                            log_context!(self),
2106                            error
2107                        );
2108                        return self.force_disconnect();
2109                    }
2110                };
2111
2112                self.state = H2State::ServerSettings;
2113                self.expect_write = Some(H2StreamId::Zero);
2114                self.readiness.signal_pending_write();
2115                return self.handle_frame(settings, 0, context, endpoint);
2116            }
2117            (H2State::ServerSettings, Position::Client(..)) => {
2118                let i = kawa.storage.data();
2119                match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2120                    Ok((
2121                        _,
2122                        header @ FrameHeader {
2123                            payload_len,
2124                            frame_type: FrameType::Settings,
2125                            flags: 0,
2126                            stream_id: 0,
2127                        },
2128                    )) => {
2129                        kawa.storage.clear();
2130                        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2131                        self.state = H2State::Frame(header)
2132                    }
2133                    _ => return self.force_disconnect(),
2134                };
2135            }
2136            (H2State::Header, _) => {
2137                return self.handle_header_state(context);
2138            }
2139            (H2State::ContinuationHeader(headers), _) => {
2140                let headers = headers.clone();
2141                return self.handle_continuation_header_state(&headers);
2142            }
2143            (H2State::Frame(header), _) => {
2144                let i = kawa.storage.unparsed_data();
2145                trace!("{}   data: {:?}", log_context!(self), i);
2146                let wire_payload_len = header.payload_len;
2147                let frame = match parser::frame_body(i, header) {
2148                    Ok((_, frame)) => frame,
2149                    Err(error) => {
2150                        let error = error_nom_to_h2(error);
2151                        error!("{} COULD NOT PARSE FRAME BODY", log_context!(self));
2152                        return self.goaway(error);
2153                    }
2154                };
2155                if let H2StreamId::Zero = stream_id {
2156                    if header.frame_type == FrameType::Headers {
2157                        kawa.storage.head = kawa.storage.end;
2158                    } else {
2159                        kawa.storage.end = kawa.storage.head;
2160                    }
2161                }
2162                self.expect_header();
2163                return self.handle_frame(frame, wire_payload_len, context, endpoint);
2164            }
2165            (H2State::ContinuationFrame(headers), _) => {
2166                kawa.storage.head = kawa.storage.end;
2167                let i = kawa.storage.data();
2168                trace!("{}   data: {:?}", log_context!(self), i);
2169                let headers = headers.clone();
2170                self.expect_header();
2171                return self.handle_frame(Frame::Headers(headers), 0, context, endpoint);
2172            }
2173        }
2174        MuxResult::Continue
2175    }
2176
2177    /// Update the H2 connection-level *aggregate* gauges with this connection's
2178    /// current contribution, expressed as a signed delta against the last
2179    /// snapshot we emitted.
2180    ///
2181    /// The three metrics are emitted via [`gauge_add!`] (lifecycle deltas) so
2182    /// that the dashboard sees the **sum across all live H2 connections**:
2183    ///
2184    /// - `h2.connection.window_bytes` — sum of available connection-level
2185    ///   send-window bytes. Negative per-connection windows clamp to 0 so the
2186    ///   aggregate represents only available capacity, not deficit.
2187    /// - `h2.connection.active_streams` — sum of in-flight streams across
2188    ///   every H2 connection.
2189    /// - `h2.connection.pending_window_updates` — sum of queued (un-flushed)
2190    ///   per-stream WINDOW_UPDATE entries across every H2 connection.
2191    ///
2192    /// Called from the write hot path; emits nothing when the snapshot is
2193    /// unchanged so the steady state stays cheap. The paired decrement for
2194    /// every increment is provided by [`Drop`], which subtracts the final
2195    /// snapshot when the connection is dropped — keeping the aggregate
2196    /// arithmetically symmetric independent of which close path runs
2197    /// (`graceful_goaway`, `force_disconnect`, `handle_goaway_frame`,
2198    /// `Mux::close`, panic-unwind, …).
2199    fn gauge_connection_state(&mut self) {
2200        let snapshot = (
2201            self.flow_control.window.max(0) as usize,
2202            self.streams.len(),
2203            self.flow_control.pending_window_updates.len(),
2204        );
2205        if self.last_gauge_snapshot == Some(snapshot) {
2206            return;
2207        }
2208        let prev = self.last_gauge_snapshot.unwrap_or((0, 0, 0));
2209        // Diff in i64 — usize cannot represent the negative side of the delta.
2210        let dw = snapshot.0 as i64 - prev.0 as i64;
2211        let ds = snapshot.1 as i64 - prev.1 as i64;
2212        let du = snapshot.2 as i64 - prev.2 as i64;
2213        if dw != 0 {
2214            gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, dw);
2215        }
2216        if ds != 0 {
2217            gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, ds);
2218        }
2219        if du != 0 {
2220            gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, du);
2221        }
2222        self.last_gauge_snapshot = Some(snapshot);
2223    }
2224
2225    /// Subtract this connection's contribution from the three aggregate
2226    /// `h2.connection.*` gauges. Idempotent: clears `last_gauge_snapshot` so a
2227    /// second call (or a [`Drop`] on top of an explicit reset) is a no-op.
2228    ///
2229    /// Pairs with every prior call to [`Self::gauge_connection_state`]; called
2230    /// from [`Drop`] so the symmetry is guaranteed regardless of the close
2231    /// path.
2232    fn release_connection_gauges(&mut self) {
2233        if let Some((w, s, u)) = self.last_gauge_snapshot.take() {
2234            if w != 0 {
2235                gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, -(w as i64));
2236            }
2237            if s != 0 {
2238                gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, -(s as i64));
2239            }
2240            if u != 0 {
2241                gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, -(u as i64));
2242            }
2243        }
2244    }
2245
2246    /// Write application data (request/response bodies, headers) across all
2247    /// active streams, respecting priority ordering and flow control.
2248    ///
2249    /// This is the main data-plane write path: it resumes any partially-written
2250    /// stream, prepares new frames via the H2 block converter, flushes them to
2251    /// the socket, and recycles completed streams.
2252    ///
2253    /// NOTE: The priority iteration loop and converter setup remain inline here
2254    /// because the converter borrows `self.encoder`, preventing further
2255    /// decomposition into `&mut self` methods within the loop body.
2256    fn write_streams<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2257    where
2258        E: Endpoint,
2259        L: ListenerHandler + L7ListenerHandler,
2260    {
2261        self.timeout_container.reset();
2262        // Pre-compute byte totals for proportional overhead distribution.
2263        let byte_totals = self.compute_stream_byte_totals(context);
2264        let mut io_slices: Vec<IoSlice<'static>> = Vec::new();
2265
2266        if let Some(
2267            write_stream @ H2StreamId::Other {
2268                id: stream_id,
2269                gid: global_stream_id,
2270            },
2271        ) = self.expect_write
2272        {
2273            let stream = &mut context.streams[global_stream_id];
2274            let stream_state = stream.state;
2275            let parts = stream.split(&self.position);
2276            let kawa = parts.wbuffer;
2277            // Resume path: if the same stream is parked waiting for buffer
2278            // space (expect_read matches write_stream), pass the amount so
2279            // flush_stream_out can re-enable READABLE as soon as we drain.
2280            let cross_read_amount = match self.expect_read {
2281                Some((read_stream, amount)) if write_stream == read_stream => Some(amount),
2282                _ => None,
2283            };
2284            let mut resume_bytes: usize = 0;
2285            let outcome = Self::flush_stream_out(
2286                &mut self.socket,
2287                kawa,
2288                parts.metrics,
2289                &self.position,
2290                &mut self.readiness,
2291                &mut context.debug,
2292                2,
2293                global_stream_id,
2294                None,
2295                cross_read_amount,
2296                &mut io_slices,
2297                Some(&mut resume_bytes),
2298            );
2299            // Refresh the per-stream idle timer when outbound bytes move: a
2300            // large response delivered at low bandwidth is "active", not idle,
2301            // even when the peer sends no inbound frames.
2302            if resume_bytes > 0 {
2303                if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2304                    *t = Instant::now();
2305                }
2306            }
2307            if outcome == FlushOutcome::Stalled {
2308                return MuxResult::Continue;
2309            }
2310            self.expect_write = None;
2311            if (kawa.is_terminated() || kawa.is_error())
2312                && kawa.is_completed()
2313                && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2314            {
2315                let (client_rtt, server_rtt) = Self::snapshot_rtts(
2316                    &self.position,
2317                    &self.socket,
2318                    &endpoint,
2319                    stream.linked_token(),
2320                );
2321
2322                if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2323                    &self.position,
2324                    &mut self.bytes,
2325                    &self.streams,
2326                    stream,
2327                    global_stream_id,
2328                    stream_id,
2329                    byte_totals,
2330                    &mut context.debug,
2331                    context.listener.clone(),
2332                    client_rtt,
2333                    server_rtt,
2334                ) {
2335                    // Remove the recycled stream from the connection maps
2336                    // before endpoint.end_stream() can trigger teardown.
2337                    // Otherwise session close can observe a stale `Recycle`
2338                    // entry in self.streams and mis-handle the connection as
2339                    // if it still had an active H2 stream.
2340                    self.remove_dead_stream(dead_id, global_stream_id);
2341                    if let Some(token) = token {
2342                        remove_backend_stream(
2343                            &mut context.backend_streams,
2344                            token,
2345                            global_stream_id,
2346                        );
2347                        endpoint.end_stream(token, global_stream_id, context);
2348                    }
2349                }
2350            }
2351        }
2352
2353        self.gauge_connection_state();
2354
2355        let scheme: &'static [u8] = if context.listener.borrow().protocol() == Protocol::HTTPS {
2356            b"https"
2357        } else {
2358            b"http"
2359        };
2360        let mut completed_streams = Vec::new();
2361        let mut converter_buf = std::mem::take(&mut self.converter_buf);
2362        converter_buf.clear();
2363        let mut converter = converter::H2BlockConverter {
2364            max_frame_size: self.peer_settings.settings_max_frame_size as usize,
2365            window: 0,
2366            stream_id: 0,
2367            encoder: &mut self.encoder,
2368            out: converter_buf,
2369            scheme,
2370            lowercase_buf: std::mem::take(&mut self.lowercase_buf),
2371            cookie_buf: std::mem::take(&mut self.cookie_buf),
2372            // When this connection is a backend client we are writing
2373            // toward the upstream backend — flow-control stalls in that
2374            // direction are scoped to `backend.flow_control.paused` (in
2375            // addition to the existing direction-agnostic
2376            // `h2.flow_control_stall`).
2377            position_is_client: self.position.is_client(),
2378            // RFC 9218 §4: toggled per-stream in the loop below, driven by
2379            // `Prioriser::get(stream_id).1`. Non-incremental by default so
2380            // unit tests and non-scheduled callers (e.g. the resume path
2381            // above) keep the sequential semantics.
2382            incremental_mode: false,
2383            // Populated once per write pass from `apply_incremental_rotation`
2384            // below. The converter uses `incremental_peer_count <= 1` to skip
2385            // the RFC 9218 yield-after-one-DATA behaviour when there is no
2386            // peer to interleave with (solo-bucket fast path).
2387            incremental_peer_count: 0,
2388            // RFC 7541 §6.3: move the pending size-update onto the converter
2389            // so the first header block of this pass prepends the signal.
2390            // We clear the connection-side mirror only AFTER the write pass
2391            // confirms emission via `converter.size_update_emitted`, so a
2392            // DATA-only write pass (no header block) does not drop the
2393            // signal.
2394            pending_table_size_update: self.pending_table_size_update,
2395            size_update_emitted: false,
2396            // Reset on every write pass; `check_header_capacity` flips it
2397            // mid-call and `finalize` commits the abort by flipping
2398            // `kawa.parsing_phase` to Error so the next pass emits
2399            // RST_STREAM(InternalError).
2400            pending_oversized_abort: false,
2401        };
2402        self.priorities_buf.clear();
2403        self.priorities_buf.extend(self.streams.keys().copied());
2404        // RFC 9218 §4 primary sort: ascending urgency, then stream ID for
2405        // stability. The incremental flag is handled by
2406        // `apply_incremental_rotation` below so it does not perturb the
2407        // non-incremental fast path.
2408        self.priorities_buf.sort_by_cached_key(|id| {
2409            let (urgency, _) = self.prioriser.get(id);
2410            (urgency, *id)
2411        });
2412        // RFC 9218 §4: inside each urgency bucket, move incremental streams
2413        // to the tail and rotate them by the per-connection round-robin
2414        // cursor so no single slow-draining stream can starve its
2415        // same-urgency incremental peers.
2416        let incremental_count = self
2417            .prioriser
2418            .apply_incremental_rotation(&mut self.priorities_buf);
2419
2420        // RFC 9218 §4 refinement (Tier 3a): the connection-global
2421        // `incremental_count` is too coarse for `converter.incremental_peer_count`.
2422        // A solo `u=0, i` stream with an unrelated `u=7, i` peer in a
2423        // different urgency bucket would still see `incremental_peer_count > 1`
2424        // and voluntarily yield — stranding bytes the invariant-15/16 guards
2425        // were meant to prevent. Scope the count to same-urgency streams that
2426        // are actually ready to emit this pass (eligibility mirrors the check
2427        // in the write loop below).
2428        let mut ready_incremental_by_urgency: HashMap<u8, usize> = HashMap::new();
2429        for &sid in self.priorities_buf.iter() {
2430            let (urgency, is_incremental) = self.prioriser.get(&sid);
2431            if !is_incremental {
2432                continue;
2433            }
2434            let Some(&gid) = self.streams.get(&sid) else {
2435                continue;
2436            };
2437            let wbuffer = match self.position {
2438                Position::Server => &context.streams[gid].back,
2439                Position::Client(..) => &context.streams[gid].front,
2440            };
2441            if wbuffer.is_main_phase()
2442                || (wbuffer.is_terminated() && !wbuffer.is_completed())
2443                || (wbuffer.is_error() && !self.rst_sent.contains(&sid))
2444            {
2445                *ready_incremental_by_urgency.entry(urgency).or_insert(0) += 1;
2446            }
2447        }
2448
2449        trace!(
2450            "{} PRIORITIES: {:?} (incremental_count={}, per_bucket={:?})",
2451            log_context!(self),
2452            self.priorities_buf,
2453            incremental_count,
2454            ready_incremental_by_urgency
2455        );
2456        let mut socket_write = false;
2457        // RFC 9218 §4 round-robin: remember the first incremental stream we
2458        // served this pass so we can advance `Prioriser::incremental_cursor`
2459        // to it, causing the next pass to start with the stream just after.
2460        let mut first_incremental_fired: Option<StreamId> = None;
2461        // Total outbound bytes emitted across all stream flushes this pass —
2462        // `finalize_write` uses this to distinguish a voluntary scheduler
2463        // yield (progress + pending back-buffer, LIFECYCLE §9 invariant 16)
2464        // from a no-progress wait state (e.g. flow-control starvation).
2465        let mut total_bytes_written: usize = 0;
2466        // Collect every fresh RST_STREAM emitted via the converter
2467        // (`initialize` chokepoint or the HPACK over-budget abort path)
2468        // so we can run `account_emitted_rst` for each one AFTER the
2469        // converter is dropped — the converter holds `&mut self.encoder`
2470        // for the loop body so we cannot take `&mut self` until then.
2471        let mut freshly_emitted_rsts: Vec<H2Error> = Vec::new();
2472        'outer: for idx in 0..self.priorities_buf.len() {
2473            let stream_id = self.priorities_buf[idx];
2474            let Some(&global_stream_id) = self.streams.get(&stream_id) else {
2475                error!(
2476                    "{} stream_id {} from sorted keys missing in streams map",
2477                    log_context!(self),
2478                    stream_id
2479                );
2480                continue;
2481            };
2482            let (urgency, is_incremental) = self.prioriser.get(&stream_id);
2483            let stream = &mut context.streams[global_stream_id];
2484            let stream_state = stream.state;
2485            let parts = stream.split(&self.position);
2486            let kawa = parts.wbuffer;
2487            if kawa.is_main_phase()
2488                || (kawa.is_terminated() && !kawa.is_completed())
2489                || (kawa.is_error() && !self.rst_sent.contains(&stream_id))
2490            {
2491                let window = min(*parts.window, self.flow_control.window);
2492                converter.window = window;
2493                converter.stream_id = stream_id;
2494                // RFC 9218 §4: incremental streams yield the converter after
2495                // a single DATA frame so same-urgency peers interleave.
2496                converter.incremental_mode = is_incremental;
2497                // Same-urgency-bucket ready-peer count (Tier 3a, LIFECYCLE §9
2498                // invariant 17). The converter skips the yield when there is
2499                // no peer in the same bucket to interleave with — prevents
2500                // the `finalize_write` WRITABLE-withdrawal strand (see
2501                // `test_h2_solo_incremental_drains_fully`). A connection-wide
2502                // count would wrongly yield for a solo incremental stream
2503                // when another urgency bucket happens to contain an
2504                // incremental peer.
2505                converter.incremental_peer_count = ready_incremental_by_urgency
2506                    .get(&urgency)
2507                    .copied()
2508                    .unwrap_or(0);
2509                // Track RST_STREAM dedup: if kawa is in error state, the converter
2510                // will generate a RST_STREAM frame via `initialize`. Mark it so we
2511                // don't send a duplicate on the next writable cycle.
2512                if kawa.is_error() {
2513                    let freshly_rst = self.rst_sent.insert(stream_id);
2514                    // LIFECYCLE §9 invariant 17: any transition to ineligible
2515                    // mid-pass MUST decrement ready_incremental_by_urgency so
2516                    // later streams in the same 'outer iteration see the live
2517                    // count, not the snapshot. Missing this costs one voluntary
2518                    // yield per same-urgency peer that trails the RST.
2519                    if freshly_rst && is_incremental {
2520                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2521                            *c = c.saturating_sub(1);
2522                        }
2523                    }
2524                    // Account for the RST that `initialize` is about to emit
2525                    // for this stream. Without this the MadeYouReset lifetime
2526                    // cap is evadable: any path that flips `parsing_phase` to
2527                    // Error before reaching this gate (oversized inbound
2528                    // trailers, malformed bodies, etc.) would land an
2529                    // unaccounted RST on the wire. We defer the actual
2530                    // accounting call until after `drop(converter)` — the
2531                    // converter holds `&mut self.encoder` here.
2532                    if freshly_rst {
2533                        freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
2534                    }
2535                }
2536                // Apply per-frontend response-side header edits
2537                // (set/replace/delete) stashed by the routing layer at
2538                // request time. H2 frontends always run as Server
2539                // position; the back-side H2 client (when sozu speaks
2540                // H2 to a backend) is a request emission and was
2541                // already mutated by Router::route_from_request.
2542                //
2543                // The snapshot is **drained** via `mem::take` so the
2544                // injection runs exactly once per response. Without
2545                // this, a re-entry of `write_streams` for the same
2546                // stream (multi-frame body, flow-control yield, or
2547                // RFC 9218 same-urgency round-robin) would re-call
2548                // `apply_response_header_edits` after `kawa.prepare`
2549                // had already consumed the `Block::Flags{end_header}`
2550                // anchor — the helper falls back to
2551                // `kawa.blocks.len()` and appends the edit AFTER all
2552                // remaining DATA blocks. The next prepare cycle then
2553                // encodes that orphan `Block::Header` into
2554                // `H2BlockConverter.out` with no closing
2555                // `Block::Flags{end_header}` to flush it as a HEADERS
2556                // frame, and `H2BlockConverter::finalize` trips the
2557                // "out buffer not empty (38 bytes remaining), clearing"
2558                // defense-in-depth log on every re-entry. 38 bytes is
2559                // the static-table HPACK encoding of a typical HSTS
2560                // header, which is how the symptom surfaces in
2561                // production once the listener-default HSTS reaches a
2562                // non-trivial share of frontends.
2563                if matches!(self.position, super::Position::Server)
2564                    && !parts.context.headers_response.is_empty()
2565                {
2566                    let edits = std::mem::take(&mut parts.context.headers_response);
2567                    super::shared::apply_response_header_edits(kawa, &edits);
2568                }
2569                kawa.prepare(&mut converter);
2570                // The pre-prepare gate at line 2483 only inserts into
2571                // `rst_sent` when `kawa.is_error()` is already true on
2572                // entry. The HPACK over-budget abort path
2573                // (`H2BlockConverter::check_header_capacity` →
2574                // `finalize`) flips `parsing_phase` to Error AND pushes
2575                // its own RST_STREAM frame inside this same prepare
2576                // pass; without a post-prepare insert here the next
2577                // writable cycle would gate-pass and double-emit a
2578                // RST_STREAM via the existing `initialize` chokepoint.
2579                //
2580                // Per Codex P2: the converter's direct RST emission
2581                // bypasses the metric/flood accounting that
2582                // `Self::reset_stream` performs. Mirror it here so a
2583                // peer that drives oversized headers across many
2584                // streams cannot escape the MadeYouReset emitted-RST
2585                // lifetime cap and so dashboards see the per-error
2586                // counter and the global tx counter.
2587                //
2588                // Per Codex P3: when an incremental stream flips to
2589                // Error mid-prepare, the RFC 9218 §4 yield-after-one
2590                // accounting must drop this stream from the
2591                // same-urgency ready bucket so trailing peers see the
2592                // live count.
2593                let freshly_rst_post_prepare = kawa.is_error() && self.rst_sent.insert(stream_id);
2594                if freshly_rst_post_prepare {
2595                    // Defer accounting until after `drop(converter)`; same
2596                    // reason as the pre-prepare collector above.
2597                    freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
2598                    if is_incremental {
2599                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2600                            *c = c.saturating_sub(1);
2601                        }
2602                    }
2603                }
2604                let consumed = window - converter.window;
2605                *parts.window = parts.window.saturating_sub(consumed);
2606                self.flow_control.window = self.flow_control.window.saturating_sub(consumed);
2607                if is_incremental && consumed > 0 && first_incremental_fired.is_none() {
2608                    first_incremental_fired = Some(stream_id);
2609                }
2610            }
2611            context.debug.push(DebugEvent::S(
2612                stream_id,
2613                global_stream_id,
2614                kawa.parsing_phase,
2615                kawa.blocks.len(),
2616                kawa.out.len(),
2617            ));
2618            let mut stream_bytes: usize = 0;
2619            let outcome = Self::flush_stream_out(
2620                &mut self.socket,
2621                kawa,
2622                parts.metrics,
2623                &self.position,
2624                &mut self.readiness,
2625                &mut context.debug,
2626                3,
2627                global_stream_id,
2628                Some(&mut socket_write),
2629                None,
2630                &mut io_slices,
2631                Some(&mut stream_bytes),
2632            );
2633            // Refresh the per-stream idle timer on outbound bytes. Without
2634            // this, a long-running response trickled at low bandwidth would
2635            // be killed by `cancel_timed_out_streams` mid-delivery — the
2636            // inbound-only refresh at h2.rs:3887-3895 / 4026-4031 never
2637            // fires while the peer is idle.
2638            if stream_bytes > 0 {
2639                if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2640                    *t = Instant::now();
2641                }
2642            }
2643            total_bytes_written = total_bytes_written.saturating_add(stream_bytes);
2644            if outcome == FlushOutcome::Stalled {
2645                self.expect_write = Some(H2StreamId::Other {
2646                    id: stream_id,
2647                    gid: global_stream_id,
2648                });
2649                break 'outer;
2650            }
2651            self.expect_write = None;
2652            if (kawa.is_terminated() || kawa.is_error())
2653                && kawa.is_completed()
2654                && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2655            {
2656                let close_frontend =
2657                    matches!(self.position, Position::Server) && !parts.context.keep_alive_frontend;
2658                let (client_rtt, server_rtt) = Self::snapshot_rtts(
2659                    &self.position,
2660                    &self.socket,
2661                    &endpoint,
2662                    stream.linked_token(),
2663                );
2664
2665                if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2666                    &self.position,
2667                    &mut self.bytes,
2668                    &self.streams,
2669                    stream,
2670                    global_stream_id,
2671                    stream_id,
2672                    byte_totals,
2673                    &mut context.debug,
2674                    context.listener.clone(),
2675                    client_rtt,
2676                    server_rtt,
2677                ) {
2678                    completed_streams.push((dead_id, global_stream_id, token, close_frontend));
2679                    // LIFECYCLE §9 invariant 17: decrement INSIDE 'outer so
2680                    // later iterations see the reduced count. The post-loop
2681                    // retirement at remove_dead_stream is too late.
2682                    if is_incremental {
2683                        if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2684                            *c = c.saturating_sub(1);
2685                        }
2686                    }
2687                }
2688            }
2689        }
2690        gauge!(
2691            "h2.streams.ready_incremental.by_urgency",
2692            ready_incremental_by_urgency
2693                .values()
2694                .copied()
2695                .sum::<usize>()
2696        );
2697        // Reclaim the converter's reusable buffers before any &mut self calls,
2698        // since the converter borrows self.encoder.
2699        let converter_out = std::mem::take(&mut converter.out);
2700        let lowercase_buf = std::mem::take(&mut converter.lowercase_buf);
2701        let cookie_buf = std::mem::take(&mut converter.cookie_buf);
2702        // RFC 7541 §6.3: clear our mirror of the pending size-update only
2703        // AFTER the converter confirmed the signal was emitted to its
2704        // output buffer. A DATA-only pass leaves `size_update_emitted` as
2705        // `false` so the signal stays queued for the next pass with a
2706        // header block.
2707        let size_update_emitted = converter.size_update_emitted;
2708        drop(converter);
2709        if size_update_emitted {
2710            self.pending_table_size_update = None;
2711        }
2712        // Account every RST that the converter emitted during this pass
2713        // (pre-prepare gate + post-prepare HPACK over-budget abort) so
2714        // the global tx counter, the per-error breakdown, and the
2715        // MadeYouReset emitted-RST lifetime cap stay in step. If the
2716        // cap trips, propagate the GOAWAY result.
2717        for error in freshly_emitted_rsts {
2718            if let Some(result) = self.account_emitted_rst(error) {
2719                return result;
2720            }
2721        }
2722        self.converter_buf = converter_out;
2723        self.lowercase_buf = lowercase_buf;
2724        self.cookie_buf = cookie_buf;
2725        self.shrink_converter_buffers();
2726        // RFC 9218 §4: commit the round-robin cursor so the next writable
2727        // cycle begins with the stream immediately after the one we fired
2728        // first this pass.
2729        self.prioriser
2730            .advance_incremental_cursor(first_incremental_fired);
2731        let mut close_frontend_after_completed_stream = false;
2732        for (dead_id, global_stream_id, token, close_frontend) in completed_streams {
2733            // The main write loop borrows self.encoder, so we can't mutate the
2734            // H2 maps inline. Retire the recycled stream immediately after the
2735            // converter borrow ends, before endpoint.end_stream() can trigger
2736            // teardown and observe a stale `Recycle` entry in self.streams.
2737            self.remove_dead_stream(dead_id, global_stream_id);
2738            close_frontend_after_completed_stream |= close_frontend;
2739            if let Some(token) = token {
2740                remove_backend_stream(&mut context.backend_streams, token, global_stream_id);
2741                endpoint.end_stream(token, global_stream_id, context);
2742            }
2743        }
2744        if close_frontend_after_completed_stream && !self.drain.draining {
2745            return if self.streams.is_empty() {
2746                self.goaway(H2Error::NoError)
2747            } else {
2748                self.graceful_goaway()
2749            };
2750        }
2751        self.finalize_write(socket_write, total_bytes_written, context)
2752    }
2753
2754    /// Remove streams that completed their lifecycle from all tracking maps.
2755    /// After forwarding a 1xx informational response (100 Continue, 103 Early Hints),
2756    /// reset the back buffer and re-enable backend readable so the final response
2757    /// can arrive on the same stream. Returns true if the response was 1xx.
2758    #[allow(clippy::too_many_arguments)]
2759    fn flush_stream_out(
2760        socket: &mut Front,
2761        kawa: &mut GenericHttpStream,
2762        metrics: &mut SessionMetrics,
2763        position: &Position,
2764        readiness: &mut Readiness,
2765        debug: &mut DebugHistory,
2766        debug_site: usize,
2767        global_stream_id: GlobalStreamId,
2768        mut wrote: Option<&mut bool>,
2769        cross_read_amount: Option<usize>,
2770        io_slices: &mut Vec<IoSlice<'static>>,
2771        mut bytes_written: Option<&mut usize>,
2772    ) -> FlushOutcome {
2773        while !kawa.out.is_empty() {
2774            if let Some(flag) = wrote.as_deref_mut() {
2775                *flag = true;
2776            }
2777            io_slices.clear();
2778            let buffer = kawa.storage.buffer();
2779            for block in kawa.out.iter() {
2780                match block {
2781                    kawa::OutBlock::Delimiter => break,
2782                    kawa::OutBlock::Store(store) => {
2783                        let data = store.data(buffer);
2784                        // SAFETY: the IoSlice references point into kawa's
2785                        // storage buffer. They are used only for the
2786                        // socket_write_vectored call below and cleared
2787                        // immediately after, before kawa.consume() which may
2788                        // relocate the buffer via ptr::copy (shift). No
2789                        // dangling 'static refs exist during consume().
2790                        let data: &'static [u8] =
2791                            unsafe { std::slice::from_raw_parts(data.as_ptr(), data.len()) };
2792                        io_slices.push(IoSlice::new(data));
2793                    }
2794                }
2795            }
2796            let (size, status) = socket.socket_write_vectored(io_slices);
2797            io_slices.clear();
2798            debug_assert!(
2799                io_slices.is_empty(),
2800                "IoSlice refs must be cleared before consume"
2801            );
2802            debug.push(DebugEvent::SocketIO(debug_site, global_stream_id, size));
2803            kawa.consume(size);
2804            position.count_bytes_out_counter(size);
2805            position.count_bytes_out(metrics, size);
2806            if let Some(counter) = bytes_written.as_deref_mut() {
2807                *counter = counter.saturating_add(size);
2808            }
2809            if let Some(amount) = cross_read_amount {
2810                // Resume path: same stream is parked waiting for buffer space.
2811                // Re-enable READABLE once the write freed enough room.
2812                if kawa.storage.available_space() >= amount {
2813                    readiness.interest.insert(Ready::READABLE);
2814                }
2815            }
2816            if update_readiness_after_write(size, status, readiness) {
2817                return FlushOutcome::Stalled;
2818            }
2819        }
2820        FlushOutcome::Drained
2821    }
2822
2823    fn handle_1xx_reset<E: Endpoint>(
2824        kawa: &mut GenericHttpStream,
2825        stream_state: StreamState,
2826        endpoint: &mut E,
2827    ) -> bool {
2828        let is_1xx = matches!(
2829            kawa.detached.status_line,
2830            kawa::StatusLine::Response { code, .. } if (100..200).contains(&code)
2831        );
2832        if !is_1xx {
2833            return false;
2834        }
2835        debug!(
2836            "{} H2 write_streams: 1xx informational forwarded, resetting back buffer",
2837            log_module_context!()
2838        );
2839        kawa.clear();
2840        if let StreamState::Linked(token) = stream_state {
2841            let readiness = endpoint.readiness_mut(token);
2842            readiness.interest.insert(Ready::READABLE);
2843            readiness.signal_pending_read();
2844        }
2845        true
2846    }
2847
2848    /// Re-arm edge-triggered WRITABLE event if rustls still has buffered TLS data.
2849    fn ensure_tls_flushed(&mut self) {
2850        if self.socket.socket_wants_write() {
2851            self.readiness.signal_pending_write();
2852        }
2853    }
2854
2855    /// Evict every per-stream piece of state carried by this `ConnectionH2`.
2856    ///
2857    /// **Invariant**: `rst_sent`, `stream_last_activity_at` and `prioriser`
2858    /// MUST be emptied of `stream_id` here — they are the only three
2859    /// per-stream caches that are not stored in the slab-allocated
2860    /// `Context.streams[]`. Forgetting any of them causes unbounded memory
2861    /// growth on long-lived connections with many cancelled streams. The
2862    /// `debug_assert`s below fail loudly in test builds if someone adds a
2863    /// new per-stream cache without updating this function.
2864    fn remove_dead_stream(&mut self, stream_id: StreamId, global_stream_id: GlobalStreamId) {
2865        if self.streams.remove(&stream_id).is_none() {
2866            error!(
2867                "{} dead stream_id {} missing from streams map",
2868                log_context!(self),
2869                stream_id
2870            );
2871        }
2872        self.rst_sent.remove(&stream_id);
2873        self.stream_last_activity_at.remove(&stream_id);
2874        self.prioriser.remove(&stream_id);
2875        debug_assert!(
2876            !self.rst_sent.contains(&stream_id),
2877            "rst_sent still contains stream_id {stream_id} after eviction"
2878        );
2879        debug_assert!(
2880            !self.stream_last_activity_at.contains_key(&stream_id),
2881            "stream_last_activity_at still contains stream_id {stream_id} after eviction"
2882        );
2883        // Invariant: expect_write/expect_read must not reference a gid whose
2884        // context slot may be popped by shrink_trailing_recycle after eviction.
2885        if matches!(self.expect_write, Some(H2StreamId::Other { gid, .. }) if gid == global_stream_id)
2886        {
2887            self.expect_write = None;
2888        }
2889        if matches!(
2890            self.expect_read,
2891            Some((H2StreamId::Other { gid, .. }, _)) if gid == global_stream_id
2892        ) {
2893            self.expect_read = None;
2894        }
2895    }
2896
2897    /// Drop stream-id mappings for streams that never became active before a
2898    /// connection-level close. This happens on incomplete/oversized header
2899    /// blocks: the stream slot is created on the initial HEADERS frame, then a
2900    /// GOAWAY closes the connection before the request is fully materialized.
2901    fn prune_inactive_streams_while_closing<L>(&mut self, context: &mut Context<L>)
2902    where
2903        L: ListenerHandler + L7ListenerHandler,
2904    {
2905        if !self.drain.draining || !matches!(self.state, H2State::GoAway | H2State::Error) {
2906            return;
2907        }
2908
2909        let stale_streams = self
2910            .streams
2911            .iter()
2912            .filter_map(|(&stream_id, &global_stream_id)| {
2913                (!context.streams[global_stream_id].state.is_open())
2914                    .then_some((stream_id, global_stream_id))
2915            })
2916            .collect::<Vec<_>>();
2917
2918        for (stream_id, global_stream_id) in stale_streams {
2919            let stream = &mut context.streams[global_stream_id];
2920            if stream.state == StreamState::Idle {
2921                stream.front.clear();
2922                stream.front.storage.clear();
2923                stream.back.clear();
2924                stream.back.storage.clear();
2925                stream.metrics.reset();
2926                stream.state = StreamState::Recycle;
2927            }
2928            self.remove_dead_stream(stream_id, global_stream_id);
2929        }
2930    }
2931
2932    /// Shrink reusable converter buffers when they grow beyond 16 KB to avoid
2933    /// holding memory after a burst of large headers.
2934    fn shrink_converter_buffers(&mut self) {
2935        if self.converter_buf.capacity() > 16_384 {
2936            self.converter_buf.shrink_to(4096);
2937        }
2938        if self.lowercase_buf.capacity() > 16_384 {
2939            self.lowercase_buf.shrink_to(4096);
2940        }
2941        if self.cookie_buf.capacity() > 16_384 {
2942            self.cookie_buf.shrink_to(4096);
2943        }
2944    }
2945
2946    /// Post-write phase: check drain completion, flush TLS, and update readiness.
2947    ///
2948    /// `bytes_written_this_pass` reports the total outbound bytes `write_streams`
2949    /// pushed to the socket (across every stream), and is used to distinguish
2950    /// two very different "no `expect_write`" states:
2951    ///
2952    /// - **Voluntary yield with progress**: at least one DATA/HEADERS frame
2953    ///   emitted, but a stream left non-empty `back.out`/`back.blocks` because
2954    ///   the converter yielded (e.g. RFC 9218 incremental rotation). LIFECYCLE
2955    ///   §9 invariant 16: keep `Ready::WRITABLE` armed so the session loop can
2956    ///   resume flushing on the next tick without waiting for an external
2957    ///   wake-up that edge-triggered epoll will not deliver.
2958    /// - **No progress at all**: converter pushed every block back (e.g. flow
2959    ///   window exhausted, no HEADERS ready yet). Strip `Ready::WRITABLE` —
2960    ///   forward progress must come from an external trigger
2961    ///   (`WINDOW_UPDATE`, new request), not from looping writable().
2962    ///
2963    /// Returns `MuxResult::Continue` in the normal case, or triggers a graceful
2964    /// GOAWAY when draining and all streams have completed.
2965    fn finalize_write<L>(
2966        &mut self,
2967        socket_write: bool,
2968        bytes_written_this_pass: usize,
2969        context: &mut Context<L>,
2970    ) -> MuxResult
2971    where
2972        L: ListenerHandler + L7ListenerHandler,
2973    {
2974        // RFC 9113 §6.8: if draining and all streams have completed,
2975        // send the final GOAWAY with the actual last_stream_id
2976        if self.drain.draining && self.streams.is_empty() {
2977            return self.graceful_goaway();
2978        }
2979
2980        if self.socket.socket_wants_write() {
2981            if !socket_write {
2982                self.socket.socket_write(&[]);
2983            }
2984            // Edge-triggered epoll: re-arm WRITABLE if rustls still has
2985            // pending encrypted data (first check triggers flush, second re-checks).
2986            self.ensure_tls_flushed();
2987        } else if self.expect_write.is_none() {
2988            // LIFECYCLE §9 invariant 16: retain `Ready::WRITABLE` when a
2989            // voluntary scheduler yield leaves stranded bytes in a stream's
2990            // `back.out`/`back.blocks` *after* the pass made forward
2991            // progress. Requiring progress avoids the degenerate no-progress
2992            // loop (e.g. flow-control-starved streams) that would otherwise
2993            // busy-spin against the session dispatcher.
2994            if bytes_written_this_pass > 0
2995                && any_stream_has_pending_back(&self.streams, &context.streams)
2996            {
2997                #[cfg(debug_assertions)]
2998                context.debug.push(DebugEvent::Str(
2999                    "finalize_write: invariant 16 retained WRITABLE (pending back-buffer)"
3000                        .to_owned(),
3001                ));
3002            } else if !self.pending_rst_streams.is_empty()
3003                || !self.flow_control.pending_window_updates.is_empty()
3004            {
3005                // Control-frame liveness: `flush_pending_control_frames` is
3006                // gated on `expect_write.is_none()`, so when a prior partial
3007                // write deferred the flush the RST / WINDOW_UPDATE queues
3008                // stay non-empty after `expect_write` finally drains. Without
3009                // this rearm the next tick would drop `Ready::WRITABLE` and
3010                // the queued RST would stall until an unrelated event
3011                // re-triggered writable — which is exactly the scenario
3012                // h2spec trips by sending back-to-back malformed streams.
3013                #[cfg(debug_assertions)]
3014                context.debug.push(DebugEvent::Str(
3015                    "finalize_write: retained WRITABLE (control queue non-empty)".to_owned(),
3016                ));
3017                self.readiness.arm_writable();
3018                incr!(names::h2::SIGNAL_WRITABLE_REARMED_CONTROL_QUEUE);
3019            } else {
3020                // We wrote everything
3021                #[cfg(debug_assertions)]
3022                context.debug.push(DebugEvent::Str(format!(
3023                    "Wrote everything: {:?}",
3024                    self.streams
3025                )));
3026                self.readiness.interest.remove(Ready::WRITABLE);
3027            }
3028        }
3029        MuxResult::Continue
3030    }
3031
3032    /// Flush pending control frames (zero-buffer resume, WINDOW_UPDATEs, RST_STREAMs)
3033    /// before entering the main writable state machine.
3034    ///
3035    /// Returns `Some(result)` if the caller should return early (e.g. socket would
3036    /// block, GOAWAY triggered), or `None` if writable() should proceed normally.
3037    fn flush_pending_control_frames(&mut self) -> Option<MuxResult> {
3038        if self.frontend_hung_up_while_draining() {
3039            self.expect_write = None;
3040            self.zero.storage.clear();
3041            self.flow_control.pending_window_updates.clear();
3042            self.pending_rst_streams.clear();
3043        }
3044
3045        // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
3046        if let Some(sent_at) = self.settings_sent_at {
3047            if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
3048                warn!(
3049                    "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
3050                    log_context!(self),
3051                    SETTINGS_ACK_TIMEOUT
3052                );
3053                return Some(self.goaway(H2Error::SettingsTimeout));
3054            }
3055        }
3056
3057        // Stage — resume zero-buffer flush.
3058        // If a previous write was partial, finish it before serialising any
3059        // new control frames. Don't reset the timeout for control frame
3060        // writes (SETTINGS ACK, PING response, WINDOW_UPDATE) — only
3061        // application-data writes should reset it.
3062        if let Some(H2StreamId::Zero) = self.expect_write {
3063            if self.flush_zero_to_socket() {
3064                self.ensure_tls_flushed();
3065                return Some(MuxResult::Continue);
3066            }
3067            // When H2StreamId::Zero is used to write, READABLE is disabled —
3068            // re-enable it now that the flush is complete.
3069            self.readiness.interest.insert(Ready::READABLE);
3070            self.expect_write = None;
3071        }
3072
3073        // Stage — drain pending WINDOW_UPDATE frames.
3074        // Serialize and flush them inline to avoid extra event loop
3075        // iterations that could cause response data to be sent before
3076        // subsequent frames are validated.
3077        if !self.flow_control.pending_window_updates.is_empty() && self.expect_write.is_none() {
3078            let kawa = &mut self.zero;
3079            kawa.storage.clear();
3080            let buf = kawa.storage.space();
3081            let mut offset = 0;
3082            // Track which entries we successfully serialized so we can remove them.
3083            // Each WINDOW_UPDATE frame is 13 bytes (9-byte header + 4-byte payload).
3084            let mut written_ids = Vec::new();
3085            for (&stream_id, &increment) in &self.flow_control.pending_window_updates {
3086                if increment == 0 {
3087                    written_ids.push(stream_id);
3088                    continue;
3089                }
3090                match serializer::gen_window_update(&mut buf[offset..], stream_id, increment) {
3091                    Ok((_, size)) => {
3092                        offset += size;
3093                        written_ids.push(stream_id);
3094                        incr!(names::h2::FRAMES_TX_WINDOW_UPDATE);
3095                    }
3096                    Err(_) => {
3097                        // Buffer full — stop here, remaining entries stay in the map
3098                        break;
3099                    }
3100                }
3101            }
3102            // Remove only the entries we successfully wrote (or skipped)
3103            for id in written_ids {
3104                self.flow_control.pending_window_updates.remove(&id);
3105            }
3106            if offset > 0 {
3107                kawa.storage.fill(offset);
3108                if self.flush_zero_to_socket() {
3109                    self.expect_write = Some(H2StreamId::Zero);
3110                    // Edge-triggered epoll: ensure pending TLS data gets flushed
3111                    if self.socket.socket_wants_write() {
3112                        self.readiness.event.insert(Ready::WRITABLE);
3113                    }
3114                    return Some(MuxResult::Continue);
3115                }
3116            }
3117        }
3118
3119        // Stage — RST_STREAM cap check + drain.
3120        // Check the lifetime total (not just pending queue length) because
3121        // writable() drains the queue between readable() calls, so the
3122        // pending count alone may never reach the cap even under sustained
3123        // misbehavior.
3124        if !matches!(self.state, H2State::GoAway | H2State::Error)
3125            && self.total_rst_streams_queued >= MAX_PENDING_RST_STREAMS
3126        {
3127            error!(
3128                "{} total RST_STREAM count {} exceeds cap {}, sending GOAWAY(ENHANCE_YOUR_CALM)",
3129                log_context!(self),
3130                self.total_rst_streams_queued,
3131                MAX_PENDING_RST_STREAMS
3132            );
3133            return Some(self.goaway(H2Error::EnhanceYourCalm));
3134        }
3135
3136        // Flush pending RST_STREAM frames (queued when refusing streams).
3137        // Accounting happens at queue-time inside `Self::enqueue_rst`, so
3138        // this drain only serialises and flushes — no metric/flood calls
3139        // here would double-count.
3140        if !self.pending_rst_streams.is_empty() && self.expect_write.is_none() {
3141            let kawa = &mut self.zero;
3142            kawa.storage.clear();
3143            let buf = kawa.storage.space();
3144            let mut offset = 0;
3145            let mut written_count = 0;
3146            for &(stream_id, ref error) in &self.pending_rst_streams {
3147                let frame_size =
3148                    parser::FRAME_HEADER_SIZE + parser::RST_STREAM_PAYLOAD_SIZE as usize;
3149                if offset + frame_size > buf.len() {
3150                    break;
3151                }
3152                match serializer::gen_rst_stream(&mut buf[offset..], stream_id, error.to_owned()) {
3153                    Ok((_, _)) => {
3154                        offset += frame_size;
3155                        written_count += 1;
3156                    }
3157                    Err(_) => break,
3158                }
3159            }
3160            self.pending_rst_streams.drain(..written_count);
3161            if offset > 0 {
3162                kawa.storage.fill(offset);
3163                if self.flush_zero_to_socket() {
3164                    self.expect_write = Some(H2StreamId::Zero);
3165                    // Edge-triggered epoll: ensure pending TLS data gets flushed
3166                    if self.socket.socket_wants_write() {
3167                        self.readiness.event.insert(Ready::WRITABLE);
3168                    }
3169                    return Some(MuxResult::Continue);
3170                }
3171            }
3172        }
3173
3174        None
3175    }
3176
3177    pub fn writable<E, L>(&mut self, context: &mut Context<L>, endpoint: E) -> MuxResult
3178    where
3179        E: Endpoint,
3180        L: ListenerHandler + L7ListenerHandler,
3181    {
3182        self.prune_inactive_streams_while_closing(context);
3183
3184        if let Some(result) = self.flush_pending_control_frames() {
3185            return result;
3186        }
3187
3188        // Flush any pending TLS records before state-specific processing.
3189        // This ensures response DATA frames that were accepted by rustls
3190        // (via socket_write_vectored in write_streams) are pushed to the
3191        // TCP socket even when the connection is in GoAway or Error state.
3192        // Without this, the state-specific handlers may call force_disconnect()
3193        // before the response data reaches the kernel's TCP send buffer.
3194        if self.socket.socket_wants_write() {
3195            self.socket.socket_write(&[]);
3196        }
3197
3198        match (&self.state, &self.position) {
3199            (H2State::Error, Position::Server) => {
3200                if self.socket.socket_wants_write() {
3201                    self.ensure_tls_flushed();
3202                    MuxResult::Continue
3203                } else {
3204                    MuxResult::CloseSession
3205                }
3206            }
3207            (H2State::Error, _)
3208            | (H2State::ClientSettings, Position::Server)
3209            | (H2State::ServerSettings, Position::Client(..)) => {
3210                error!(
3211                    "{} Unexpected combination: (Writable, {:?}, {:?})",
3212                    log_context!(self),
3213                    self.state,
3214                    self.position
3215                );
3216                self.force_disconnect()
3217            }
3218            (H2State::ClientPreface, Position::Server) => MuxResult::Continue,
3219            // Discard state: pending data (e.g. RST_STREAM) was already
3220            // written in the preamble above; let the readable path consume
3221            // the remaining frame payload.
3222            (H2State::Discard, _) => MuxResult::Continue,
3223            (H2State::GoAway, _) => {
3224                if self.peer_gone_after_final_goaway() {
3225                    return MuxResult::CloseSession;
3226                }
3227                // Flush any remaining TLS response data before disconnecting.
3228                // The GoAway state only enters after control frames (our GOAWAY
3229                // response) are flushed above, but response DATA frames may still
3230                // be in rustls's TLS output buffer — accepted by socket_write_vectored
3231                // during write_streams() but not yet flushed to TCP. Under TCP
3232                // backpressure (HAProxy chain), this is the primary truncation vector.
3233                if self.socket.socket_wants_write() {
3234                    self.socket.socket_write(&[]);
3235                    if self.socket.socket_wants_write() {
3236                        // TLS data still pending (TCP backpressure) — don't disconnect
3237                        // yet. Re-arm WRITABLE so the event loop retries the flush.
3238                        self.ensure_tls_flushed();
3239                        return MuxResult::Continue;
3240                    }
3241                }
3242                self.force_disconnect()
3243            }
3244            (H2State::ClientPreface, Position::Client(..)) => {
3245                trace!("{} Preparing preface and settings", log_context!(self));
3246                let pri = serializer::H2_PRI.as_bytes();
3247                let kawa = &mut self.zero;
3248
3249                kawa.storage.space()[0..pri.len()].copy_from_slice(pri);
3250                kawa.storage.fill(pri.len());
3251                match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
3252                    Ok((_, size)) => {
3253                        kawa.storage.fill(size);
3254                        incr!(names::h2::FRAMES_TX_SETTINGS);
3255                        // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
3256                        self.settings_sent_at = Some(Instant::now());
3257                    }
3258                    Err(error) => {
3259                        error!(
3260                            "{} Could not serialize SettingsFrame: {:?}",
3261                            log_context!(self),
3262                            error
3263                        );
3264                        return self.force_disconnect();
3265                    }
3266                };
3267
3268                self.state = H2State::ClientSettings;
3269                self.expect_write = Some(H2StreamId::Zero);
3270                MuxResult::Continue
3271            }
3272            (H2State::ClientSettings, Position::Client(..)) => {
3273                trace!("{} Sent preface and settings", log_context!(self));
3274                self.state = H2State::ServerSettings;
3275                self.expect_read = Some((H2StreamId::Zero, 9));
3276                self.readiness.interest.remove(Ready::WRITABLE);
3277                MuxResult::Continue
3278            }
3279            (H2State::ServerSettings, Position::Server) => {
3280                // Enlarge the connection-level receive window beyond the RFC default
3281                // of 65 535 bytes. The configured window size is too small for
3282                // high-throughput proxying and causes excessive WINDOW_UPDATE
3283                // round-trips. Use additive increment rather than unconditional
3284                // assignment to preserve any window changes that occurred during
3285                // setup. Skip if the configured window equals the default (no
3286                // enlargement needed), since a zero-increment WINDOW_UPDATE
3287                // violates RFC 9113 §6.9.
3288                let increment = self
3289                    .connection_config
3290                    .initial_connection_window
3291                    .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
3292                if increment > 0 {
3293                    self.queue_window_update(0, increment);
3294                }
3295                // Do NOT increment flow_control.window here: sending our own
3296                // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
3297                // Our send window is only updated by WINDOW_UPDATEs we receive
3298                // from the peer (RFC 9113 §6.9).
3299                self.expect_header();
3300                // Keep WRITABLE so the queued WINDOW_UPDATE gets flushed.
3301                MuxResult::Continue
3302            }
3303            // Proxying states — writing application data (request/response).
3304            // Reset the timeout here, not at the top of writable(), so that
3305            // control frame writes (PING, WINDOW_UPDATE) don't reset it.
3306            (H2State::Header, _)
3307            | (H2State::Frame(_), _)
3308            | (H2State::ContinuationFrame(_), _)
3309            | (H2State::ContinuationHeader(_), _) => self.write_streams(context, endpoint),
3310        }
3311    }
3312
3313    /// Snapshot the access-log RTTs for the local frontend and the linked backend.
3314    ///
3315    /// `Position::Server`-only. On a backend H2 connection (`Position::Client`)
3316    /// the snapshot would write swapped values onto the shared `Stream.metrics`:
3317    /// the connection's `socket` is the upstream and the corresponding
3318    /// `EndpointServer::socket` returns the frontend, so the per-stream
3319    /// `client_rtt`/`server_rtt` cells would be populated with mislabelled
3320    /// values. Gating keeps backend H2 from poisoning the access-log metric
3321    /// for the matching frontend stream.
3322    ///
3323    /// Callers must invoke this BEFORE `endpoint.end_stream(...)` on reset
3324    /// paths so the backend lookup does not depend on
3325    /// `EndpointClient::end_stream` continuing to leave entries in
3326    /// `Router.backends`.
3327    ///
3328    /// Takes individual field references (not `&self`) for the same reason
3329    /// `try_recycle_server_stream` does — to avoid borrow conflicts with the
3330    /// `H2BlockConverter` that holds `&mut self.encoder` during the per-stream
3331    /// write loop.
3332    fn snapshot_rtts<E: Endpoint>(
3333        position: &Position,
3334        socket: &Front,
3335        endpoint: &E,
3336        linked_token: Option<mio::Token>,
3337    ) -> (Option<Duration>, Option<Duration>) {
3338        if !position.is_server() {
3339            return (None, None);
3340        }
3341        (
3342            socket_rtt(socket.socket_ref()),
3343            linked_token
3344                .and_then(|t| endpoint.socket(t))
3345                .and_then(socket_rtt),
3346        )
3347    }
3348
3349    /// Try to recycle a completed server-side stream by distributing overhead,
3350    /// generating access logs, and transitioning the stream to `Recycle` state.
3351    ///
3352    /// Returns `Some((stream_id, Option<token>))` if the stream was recycled, so the
3353    /// caller can add `stream_id` to the dead-streams list and call `endpoint.end_stream()`
3354    /// if a token was returned. Returns `None` if recycling was deferred or not applicable.
3355    ///
3356    /// Takes individual field references instead of `&mut self` to avoid borrow
3357    /// conflicts when the H2 block converter holds `&mut self.encoder`.
3358    /// `client_rtt`/`server_rtt` are snapshotted by the caller (which still
3359    /// owns `&self.socket` and `&endpoint`) and forwarded into the access log.
3360    #[allow(clippy::too_many_arguments)]
3361    fn try_recycle_server_stream<L>(
3362        position: &Position,
3363        bytes: &mut H2ByteAccounting,
3364        streams: &HashMap<StreamId, GlobalStreamId>,
3365        stream: &mut crate::protocol::mux::Stream,
3366        global_stream_id: GlobalStreamId,
3367        stream_id: StreamId,
3368        byte_totals: (usize, usize),
3369        debug: &mut DebugHistory,
3370        listener: std::rc::Rc<std::cell::RefCell<L>>,
3371        client_rtt: Option<Duration>,
3372        server_rtt: Option<Duration>,
3373    ) -> Option<(StreamId, Option<mio::Token>)>
3374    where
3375        L: ListenerHandler + L7ListenerHandler,
3376    {
3377        match position {
3378            Position::Client(..) => None,
3379            Position::Server => {
3380                // Already logged by a reset path; retire the stream after its RST is flushed.
3381                if stream.metrics.start.is_none() {
3382                    let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3383                    return match state {
3384                        StreamState::Linked(token) => Some((stream_id, Some(token))),
3385                        _ => Some((stream_id, None)),
3386                    };
3387                }
3388
3389                // Don't recycle if the client hasn't sent END_STREAM yet —
3390                // more DATA frames may arrive for this stream.
3391                if !stream.front_received_end_of_stream {
3392                    trace!(
3393                        "{} Defer recycle stream {}: client still sending",
3394                        log_module_context!(),
3395                        global_stream_id
3396                    );
3397                    return None;
3398                }
3399                let stream_bytes = (
3400                    stream.metrics.bin + stream.metrics.backend_bin,
3401                    stream.metrics.bout + stream.metrics.backend_bout,
3402                );
3403                distribute_overhead(
3404                    &mut stream.metrics,
3405                    &mut bytes.overhead_bin,
3406                    &mut bytes.overhead_bout,
3407                    stream_bytes,
3408                    byte_totals,
3409                    streams.len(),
3410                    streams.len() == 1,
3411                );
3412                debug.push(DebugEvent::StreamEvent(4, global_stream_id));
3413                trace!(
3414                    "{} Recycle stream: {}",
3415                    log_module_context!(),
3416                    global_stream_id
3417                );
3418                let token = Self::complete_server_stream(stream, listener, client_rtt, server_rtt);
3419                Some((stream_id, token))
3420            }
3421        }
3422    }
3423
3424    /// Finalize a server-side stream after its response has been fully written.
3425    ///
3426    /// Generates an access log, resets metrics, and transitions the stream to `Recycle`.
3427    /// Returns the backend token if the stream was `Linked`, so the caller can call
3428    /// `endpoint.end_stream()` with the full `Context` (which can't be passed here
3429    /// because `stream` borrows from `context.streams`).
3430    ///
3431    /// Callers must distribute overhead *before* calling this, since the converter
3432    /// borrow may prevent `distribute_overhead()`.
3433    fn complete_server_stream<L>(
3434        stream: &mut crate::protocol::mux::Stream,
3435        listener: std::rc::Rc<std::cell::RefCell<L>>,
3436        client_rtt: Option<Duration>,
3437        server_rtt: Option<Duration>,
3438    ) -> Option<mio::Token>
3439    where
3440        L: ListenerHandler + L7ListenerHandler,
3441    {
3442        incr!(names::http::E2E_H2);
3443        stream.metrics.backend_stop();
3444        stream.generate_access_log(
3445            false,
3446            Some("H2::Complete"),
3447            listener,
3448            client_rtt,
3449            server_rtt,
3450        );
3451        stream.metrics.reset();
3452        let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3453        if let StreamState::Linked(token) = state {
3454            Some(token)
3455        } else {
3456            None
3457        }
3458    }
3459
3460    /// Compute the total bytes transferred across all active streams.
3461    ///
3462    /// Returns `(total_bytes_in, total_bytes_out)` where bytes_in = `bin + backend_bin`
3463    /// and bytes_out = `bout + backend_bout` for each stream.
3464    fn compute_stream_byte_totals<L: ListenerHandler + L7ListenerHandler>(
3465        &self,
3466        context: &Context<L>,
3467    ) -> (usize, usize) {
3468        let mut total_in = 0usize;
3469        let mut total_out = 0usize;
3470        for &gid in self.streams.values() {
3471            let m = &context.streams[gid].metrics;
3472            total_in += m.bin + m.backend_bin;
3473            total_out += m.bout + m.backend_bout;
3474        }
3475        (total_in, total_out)
3476    }
3477
3478    /// Distribute connection-level byte overhead proportionally to a single stream.
3479    ///
3480    /// `totals` should be pre-computed via [`compute_stream_byte_totals`] **before**
3481    /// taking a mutable borrow on the target stream, to avoid borrow conflicts.
3482    /// Delegates to the free function [`distribute_overhead`].
3483    fn distribute_overhead(&mut self, metrics: &mut SessionMetrics, totals: (usize, usize)) {
3484        let stream_bytes = (
3485            metrics.bin + metrics.backend_bin,
3486            metrics.bout + metrics.backend_bout,
3487        );
3488        distribute_overhead(
3489            metrics,
3490            &mut self.bytes.overhead_bin,
3491            &mut self.bytes.overhead_bout,
3492            stream_bytes,
3493            totals,
3494            self.streams.len(),
3495            self.streams.len() <= 1,
3496        );
3497    }
3498
3499    /// Attribute accumulated `zero_bytes_read` to the stream or to connection overhead.
3500    fn attribute_bytes_to_stream(&mut self, metrics: &mut SessionMetrics) {
3501        self.position
3502            .count_bytes_in(metrics, self.bytes.zero_bytes_read);
3503        self.bytes.zero_bytes_read = 0;
3504    }
3505
3506    fn attribute_bytes_to_overhead(&mut self) {
3507        self.bytes.overhead_bin += self.bytes.zero_bytes_read;
3508        self.bytes.zero_bytes_read = 0;
3509    }
3510
3511    /// Queue a WINDOW_UPDATE, coalescing with any existing entry for the same stream_id.
3512    /// RFC 9113 §6.9.1: window size increment MUST be 1..2^31-1 (0x7FFFFFFF).
3513    ///
3514    /// Always signals pending write so callers don't have to remember the
3515    /// edge-triggered epoll invariant (see memory feedback_epollet_signal_pending_write):
3516    /// under ET epoll a queued WINDOW_UPDATE without a live WRITABLE event bit
3517    /// is invisible to filter_interest() and will never get flushed.
3518    fn queue_window_update(&mut self, stream_id: u32, increment: u32) {
3519        let max_increment = i32::MAX as u32;
3520        if let Some(existing) = self.flow_control.pending_window_updates.get_mut(&stream_id) {
3521            let old = *existing;
3522            *existing = existing.saturating_add(increment).min(max_increment);
3523            trace!(
3524                "{} WINDOW_UPDATE coalesced: stream={} old={} new={}",
3525                log_context!(self),
3526                stream_id,
3527                old,
3528                *existing
3529            );
3530        } else if self.flow_control.pending_window_updates.len() < self.max_pending_window_updates {
3531            self.flow_control
3532                .pending_window_updates
3533                .insert(stream_id, increment.min(max_increment));
3534            trace!(
3535                "{} WINDOW_UPDATE queued: stream={} increment={}",
3536                log_context!(self),
3537                stream_id,
3538                increment.min(max_increment)
3539            );
3540        } else {
3541            error!(
3542                "{} WINDOW_UPDATE dropped: queue full ({} entries), stream={} increment={}",
3543                log_context!(self),
3544                self.max_pending_window_updates,
3545                stream_id,
3546                increment
3547            );
3548            incr!(names::h2::WINDOW_UPDATE_DROPPED);
3549        }
3550        self.readiness.arm_writable();
3551    }
3552
3553    /// Re-enable READABLE if this connection is parked waiting for buffer space
3554    /// and the target stream's buffer now has enough room.
3555    ///
3556    /// This is the cross-readiness counterpart to the same-connection check in
3557    /// `writable()`. When the *other side* of a stream (frontend or backend)
3558    /// drains data via its own `writable()`, it frees buffer space that this
3559    /// connection was waiting for. Without this explicit wake-up the connection
3560    /// stays parked and the session deadlocks until a timeout fires.
3561    ///
3562    /// Returns `true` if READABLE was re-enabled.
3563    pub fn try_resume_reading<L>(&mut self, context: &Context<L>) -> bool
3564    where
3565        L: ListenerHandler + L7ListenerHandler,
3566    {
3567        if let Some((
3568            H2StreamId::Other {
3569                gid: global_stream_id,
3570                ..
3571            },
3572            amount,
3573        )) = self.expect_read
3574        {
3575            let stream = &context.streams[global_stream_id];
3576            let kawa = match self.position {
3577                Position::Client(..) => &stream.back,
3578                Position::Server => &stream.front,
3579            };
3580            if kawa.storage.available_space() >= amount {
3581                self.readiness.interest.insert(Ready::READABLE);
3582                return true;
3583            }
3584        }
3585        false
3586    }
3587
3588    /// Mark a stream's position-appropriate end-of-stream flag.
3589    ///
3590    /// Server reads from the front (client), so sets `front_received_end_of_stream`.
3591    /// Client reads from the back (backend), so sets `back_received_end_of_stream`.
3592    fn mark_end_of_stream(&self, stream: &mut crate::protocol::mux::Stream) {
3593        if self.position.is_server() {
3594            stream.front_received_end_of_stream = true;
3595        } else {
3596            stream.back_received_end_of_stream = true;
3597        }
3598    }
3599
3600    /// Cancel streams that have been idle longer than [`Self::stream_idle_timeout`].
3601    ///
3602    /// A stream is considered idle when no meaningful application data (non-empty
3603    /// DATA frames or HEADERS) has been received since the last activity timestamp
3604    /// in [`Self::stream_last_activity_at`].
3605    ///
3606    /// Mitigates slow-multiplex Slowloris (Pass 4 Medium #3): the connection-level
3607    /// idle timer resets on every frame, so a peer sending periodic control frames
3608    /// can pin `max_concurrent_streams` slots for the full nominal connection timeout.
3609    /// Per-stream idle deadlines guarantee each stream terminates if it stops making
3610    /// forward progress, regardless of connection-level liveness.
3611    ///
3612    /// Timed-out streams receive RST_STREAM(CANCEL) and are immediately removed
3613    /// from the streams map so they no longer count against MAX_CONCURRENT_STREAMS.
3614    /// Backend endpoints are notified and metrics are finalized.
3615    pub fn cancel_timed_out_streams<E, L>(&mut self, context: &mut Context<L>, endpoint: &mut E)
3616    where
3617        E: Endpoint,
3618        L: ListenerHandler + L7ListenerHandler,
3619    {
3620        // Per-connection scratch Vecs (`converter_buf`, `lowercase_buf`,
3621        // `cookie_buf`, `priorities_buf`) grow to a
3622        // high-water mark and never shrink. On a long-lived idle H2
3623        // connection that briefly carried a flurry of large headers, the
3624        // backing memory stays pinned indefinitely. Reclaim past
3625        // `SCRATCH_BUF_RETAIN` when the connection has live streams but
3626        // each scratch buffer holds 4× the cap. Quiet-time only — runs
3627        // at the top of every `cancel_timed_out_streams` invocation
3628        // (which is itself called from the readable hot loop, but only
3629        // on a session that has been idle long enough to risk timing
3630        // out a stream).
3631        const SCRATCH_BUF_RETAIN: usize = 16 * 1024;
3632        if self.converter_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3633            self.converter_buf.shrink_to(SCRATCH_BUF_RETAIN);
3634        }
3635        if self.lowercase_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3636            self.lowercase_buf.shrink_to(SCRATCH_BUF_RETAIN);
3637        }
3638        if self.cookie_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3639            self.cookie_buf.shrink_to(SCRATCH_BUF_RETAIN);
3640        }
3641        if self.priorities_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3642            self.priorities_buf.shrink_to(SCRATCH_BUF_RETAIN);
3643        }
3644
3645        if self.streams.is_empty() || self.stream_last_activity_at.is_empty() {
3646            return;
3647        }
3648        let now = Instant::now();
3649        let deadline = self.stream_idle_timeout;
3650        let timed_out: Vec<StreamId> = self
3651            .stream_last_activity_at
3652            .iter()
3653            .filter_map(|(&sid, &t)| {
3654                (self.streams.contains_key(&sid)
3655                    && !self.rst_sent.contains(&sid)
3656                    && now.saturating_duration_since(t) > deadline)
3657                    .then_some(sid)
3658            })
3659            .collect();
3660        if timed_out.is_empty() {
3661            return;
3662        }
3663        for sid in timed_out {
3664            info!(
3665                "{} H2 stream {} idle > {:?}, cancelling (slow-multiplex guard)",
3666                log_context!(self),
3667                sid,
3668                deadline
3669            );
3670            // Route through the canonical chokepoint so dedupe (rst_sent),
3671            // queued-cap accounting (MAX_PENDING_RST_STREAMS via
3672            // total_rst_streams_queued), and edge-triggered-epoll arming
3673            // (Readiness::arm_writable) all stay consistent — see LIFECYCLE
3674            // §8.2. The previous direct push bypassed all three: a peer
3675            // that opens 200 streams and lets them all idle past
3676            // stream_idle_timeout could push past the queued cap silently
3677            // (no GOAWAY(ENHANCE_YOUR_CALM) escalation), a double-cancel
3678            // pass would grow pending_rst_streams instead of short-
3679            // circuiting on the existing rst_sent membership, and the
3680            // hand-rolled `interest.insert(WRITABLE) + signal_pending_write`
3681            // pair below skipped invariant 15. Counting these RSTs against
3682            // the cap is a deliberate behaviour change: 200 cumulative idle
3683            // cancellations from one peer IS abusive (pinning
3684            // MAX_CONCURRENT_STREAMS slots), and the GOAWAY(ENHANCE_YOUR_CALM)
3685            // escalation tells the peer to reconnect with a clean state.
3686            //
3687            // We deliberately ignore the `Option<MuxResult>` flood-violation
3688            // signal here — `cancel_timed_out_streams` returns `()` and is
3689            // called as best-effort housekeeping during the read path. A
3690            // flood violation that becomes visible mid-iteration will be
3691            // re-detected on the next `record_rst_emitted` call (the
3692            // counter is sticky), so dropping the early-return is safe.
3693            let _ = self.enqueue_rst(sid, H2Error::Cancel);
3694
3695            // Remove from streams map and recycle the context stream so the slot
3696            // no longer counts against MAX_CONCURRENT_STREAMS.
3697            // Compute totals per-stream before remove (matches RST_STREAM handler).
3698            let byte_totals = self.compute_stream_byte_totals(context);
3699            if let Some(global_stream_id) = self.streams.get(&sid).copied() {
3700                {
3701                    let stream = &mut context.streams[global_stream_id];
3702                    self.attribute_bytes_to_stream(&mut stream.metrics);
3703                }
3704                // Check if stream is linked to a backend — borrow must be scoped
3705                // so end_stream can take &mut context.
3706                let linked_token = context.streams[global_stream_id].linked_token();
3707                let (client_rtt, server_rtt) =
3708                    Self::snapshot_rtts(&self.position, &self.socket, &*endpoint, linked_token);
3709                if let Some(token) = linked_token {
3710                    endpoint.end_stream(token, global_stream_id, context);
3711                }
3712                let stream = &mut context.streams[global_stream_id];
3713                match &self.position {
3714                    Position::Client(_, backend, BackendStatus::Connected) => {
3715                        let mut backend_borrow = backend.borrow_mut();
3716                        backend_borrow.active_requests =
3717                            backend_borrow.active_requests.saturating_sub(1);
3718                    }
3719                    Position::Client(..) => {}
3720                    Position::Server => {
3721                        self.distribute_overhead(&mut stream.metrics, byte_totals);
3722                        stream.metrics.backend_stop();
3723                        stream.generate_access_log(
3724                            true,
3725                            Some("H2::IdleTimeout"),
3726                            context.listener.clone(),
3727                            client_rtt,
3728                            server_rtt,
3729                        );
3730                        stream.state = StreamState::Recycle;
3731                    }
3732                }
3733                // Retire sid from streams/prioriser/stream_last_activity_at and
3734                // invalidate expect_write/expect_read if they reference this gid.
3735                self.remove_dead_stream(sid, global_stream_id);
3736            }
3737        }
3738        // Writable arming is already done by enqueue_rst -> arm_writable in
3739        // the loop above; the trailing pair was redundant after the chokepoint
3740        // routing landed.
3741    }
3742
3743    /// Queue a `RST_STREAM` frame for serialisation by
3744    /// [`Self::flush_pending_control_frames`] on the next writable tick.
3745    ///
3746    /// This is the canonical entry point for proxy-emitted stream resets:
3747    /// `DATA` on a closed stream, `MAX_CONCURRENT_STREAMS` refusal, and the
3748    /// per-stream error paths in [`Self::reset_stream`] all funnel through
3749    /// here. Serialisation is independent of the owning `Stream` still
3750    /// existing in `self.streams`, which is what lets us emit even after a
3751    /// caller has already called [`Self::remove_dead_stream`].
3752    ///
3753    /// Delegates the primitive work to [`enqueue_rst_into`] so the invariants
3754    /// are covered by unit tests that don't need a full `ConnectionH2`
3755    /// fixture. See that function's doc-comment for the three invariants
3756    /// (dedupe via `rst_sent`, MadeYouReset queued cap via
3757    /// `total_rst_streams_queued`, edge-triggered-epoll arm via
3758    /// [`Readiness::arm_writable`]).
3759    fn enqueue_rst(&mut self, wire_stream_id: StreamId, error: H2Error) -> Option<MuxResult> {
3760        let freshly_queued = enqueue_rst_into(
3761            &mut self.pending_rst_streams,
3762            &mut self.total_rst_streams_queued,
3763            &mut self.rst_sent,
3764            &mut self.readiness,
3765            wire_stream_id,
3766            error,
3767        );
3768        // Account ONLY when a new RST actually entered the queue.
3769        // Calling `enqueue_rst` for a stream that already has a queued
3770        // (or already-flushed) RST is the dedup short-circuit — counting
3771        // those would inflate `h2.frames.tx.rst_stream` /
3772        // `h2.rst_stream.sent.*` and trip the CVE-2025-8671 MadeYouReset
3773        // lifetime cap on frames that never reached the wire.
3774        //
3775        // Account at queue-time, not at drain-time. Doing it later in
3776        // `flush_pending_control_frames` would double-count any RST that
3777        // a re-entrant call (DATA on a closed stream we already RSTed)
3778        // tried to enqueue — and missing it at queue-time leaves
3779        // `cancel_timed_out_streams` / `refuse_stream_and_discard` /
3780        // DATA-on-closed-stream paths bypassing the lifetime cap
3781        // (security review LISA-001 on commit `da845c71`).
3782        if freshly_queued {
3783            self.account_emitted_rst(error)
3784        } else {
3785            None
3786        }
3787    }
3788
3789    /// Single accounting site for proxy-emitted RST_STREAM frames.
3790    /// Three things must happen for every emitted RST so flood-protection
3791    /// stays honest: the global tx counter, the per-error breakdown,
3792    /// and the MadeYouReset emitted-RST lifetime cap.
3793    ///
3794    /// Two distinct emission paths feed this helper:
3795    ///   * Queued frames — [`Self::enqueue_rst`] (and therefore every
3796    ///     callable that funnels through it: `reset_stream`,
3797    ///     `refuse_stream_and_discard`, `cancel_timed_out_streams`,
3798    ///     DATA-on-closed-stream) calls this once at queue-time. The
3799    ///     drain in `flush_pending_control_frames` does NOT call it
3800    ///     again — that would double-count.
3801    ///   * Converter-emitted frames — the converter's `initialize`
3802    ///     chokepoint (and the HPACK over-budget abort path) writes
3803    ///     RST_STREAM frames straight into `kawa.out` from inside
3804    ///     `kawa.prepare`. We collect those `H2Error` codes during the
3805    ///     `write_streams` loop and call this helper for each one
3806    ///     after `drop(converter)` (because the converter holds
3807    ///     `&mut self.encoder`).
3808    ///
3809    /// Returning `Some(MuxResult)` means the caller MUST short-circuit
3810    /// with that result — the flood detector tripped its lifetime cap
3811    /// and converted to a connection-wide GOAWAY.
3812    fn account_emitted_rst(&mut self, error: H2Error) -> Option<MuxResult> {
3813        incr!(names::h2::FRAMES_TX_RST_STREAM);
3814        count!(metric_for_rst_stream_sent(error), 1);
3815        if !matches!(error, H2Error::NoError) {
3816            if let Some(violation) = self.flood_detector.record_rst_emitted() {
3817                return Some(self.handle_flood_violation(violation));
3818            }
3819        }
3820        None
3821    }
3822
3823    /// Refuse a newly-opened stream with RST_STREAM and discard its HEADERS payload.
3824    ///
3825    /// Used when MAX_CONCURRENT_STREAMS is exceeded or buffer pool is exhausted.
3826    /// Queues the RST_STREAM for the writable path (can't write to kawa.storage
3827    /// here because it is needed to discard the HEADERS payload).
3828    ///
3829    /// Also applies SETTINGS back-pressure per RFC 9113 §5.1.2: if refusals
3830    /// burst past [`BACKPRESSURE_REFUSAL_THRESHOLD`] within
3831    /// [`BACKPRESSURE_WINDOW_DURATION`], the advertised
3832    /// `SETTINGS_MAX_CONCURRENT_STREAMS` is halved via
3833    /// [`Self::apply_mcs_backpressure`].
3834    fn refuse_stream_and_discard(
3835        &mut self,
3836        stream_id: StreamId,
3837        error: H2Error,
3838        payload_len: u32,
3839    ) -> MuxResult {
3840        if let Some(result) = self.enqueue_rst(stream_id, error) {
3841            return result;
3842        }
3843        self.state = H2State::Discard;
3844        self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
3845        self.record_refusal_for_backpressure();
3846        MuxResult::Continue
3847    }
3848
3849    /// RFC 9113 §5.1.2 SETTINGS back-pressure bookkeeping.
3850    ///
3851    /// Increments the refusal counter for the current back-pressure window
3852    /// and, when the burst threshold is crossed, halves the advertised
3853    /// `SETTINGS_MAX_CONCURRENT_STREAMS`. Further halving attempts in the
3854    /// same connection are suppressed by [`Self::mcs_backpressure_applied`]
3855    /// so sustained abuse does not collapse the cap to zero — callers can
3856    /// still promote the situation to `EnhanceYourCalm` via the flood
3857    /// detector.
3858    fn record_refusal_for_backpressure(&mut self) {
3859        if self.refuse_window_start.elapsed() >= BACKPRESSURE_WINDOW_DURATION {
3860            self.refuse_count_window = 0;
3861            self.refuse_window_start = Instant::now();
3862        }
3863        self.refuse_count_window = self.refuse_count_window.saturating_add(1);
3864        if !self.mcs_backpressure_applied
3865            && self.refuse_count_window >= BACKPRESSURE_REFUSAL_THRESHOLD
3866        {
3867            self.apply_mcs_backpressure();
3868        }
3869    }
3870
3871    /// Halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` and mark the
3872    /// back-pressure state as applied. The new value takes effect locally
3873    /// immediately — subsequent stream-open checks in `handle_header_state`
3874    /// compare `self.streams.len()` against this reduced cap, so the peer
3875    /// starts receiving `REFUSED_STREAM` earlier. A full SETTINGS re-send on
3876    /// the wire is deferred until we have a mid-connection SETTINGS queue
3877    /// (the existing path in `handle_preface_state` only fires during the
3878    /// handshake); this is noted in the task log as a minimal first step.
3879    fn apply_mcs_backpressure(&mut self) {
3880        let previous = self.local_settings.settings_max_concurrent_streams;
3881        let reduced = (previous / 2).max(1);
3882        warn!(
3883            "{} H2 SETTINGS back-pressure: refusals={} in {}s — halving \
3884             SETTINGS_MAX_CONCURRENT_STREAMS {} -> {}",
3885            log_context!(self),
3886            self.refuse_count_window,
3887            BACKPRESSURE_WINDOW_DURATION.as_secs(),
3888            previous,
3889            reduced,
3890        );
3891        self.local_settings.settings_max_concurrent_streams = reduced;
3892        self.mcs_backpressure_applied = true;
3893    }
3894
3895    /// Log a flood violation with full session context and emit the GOAWAY.
3896    ///
3897    /// Centralises the "flood detected" reporting so every site that observes a
3898    /// [`H2FloodViolation`] gets the same session-scoped log line, matching the
3899    /// RUSTLS log-context convention. Also emits the per-kind statsd counter
3900    /// (`h2.flood.violation.<kind>`) so SOC dashboards can window the trip
3901    /// rate without parsing logs — every CVE-mitigation in the H2 family
3902    /// (Rapid Reset, MadeYouReset, CONTINUATION/PING/SETTINGS floods, header
3903    /// overflow, glitch) funnels through this site.
3904    pub fn handle_flood_violation(&mut self, violation: H2FloodViolation) -> MuxResult {
3905        count!(violation.metric_key, 1);
3906        warn!(
3907            "{} H2 flood detected: {} count {} exceeds threshold {}",
3908            log_context!(self),
3909            violation.reason,
3910            violation.count,
3911            violation.threshold,
3912        );
3913        self.goaway(violation.error)
3914    }
3915}
3916
3917/// Recover the [`H2Error`] code that the converter's `initialize`
3918/// chokepoint will encode into the synthesised RST_STREAM frame for a
3919/// kawa stuck in [`kawa::ParsingPhase::Error`]. Mirrors the parse +
3920/// fallback at `lib/src/protocol/mux/converter.rs::initialize` so the
3921/// flood-accounting helper sees the same code that lands on the wire.
3922fn rst_error_from_kawa<T: kawa::AsBuffer>(kawa: &kawa::Kawa<T>) -> H2Error {
3923    match kawa.parsing_phase {
3924        kawa::ParsingPhase::Error {
3925            kind: kawa::ParsingErrorKind::Processing { message },
3926            ..
3927        } => message.parse::<H2Error>().unwrap_or(H2Error::InternalError),
3928        _ => H2Error::InternalError,
3929    }
3930}
3931
3932/// Compile-time mapping from `(prefix, H2Error)` to a static metric key.
3933///
3934/// Materialises a `&'static str` literal via `concat!`, so the metric key
3935/// never crosses through a heap allocation and the statsd drain can store it
3936/// as `&'static str`. Adding a new `H2Error` variant fails the build here —
3937/// the metric breakdown stays in lock-step with RFC 9113 §7 codes.
3938///
3939/// Used for the per-error-code counters emitted around GOAWAY and RST_STREAM
3940/// in either direction (see `metric_for_goaway_sent` etc. below).
3941macro_rules! h2_error_metric_key {
3942    ($prefix:literal, $error:expr) => {
3943        match $error {
3944            H2Error::NoError => concat!($prefix, ".no_error"),
3945            H2Error::ProtocolError => concat!($prefix, ".protocol_error"),
3946            H2Error::InternalError => concat!($prefix, ".internal_error"),
3947            H2Error::FlowControlError => concat!($prefix, ".flow_control_error"),
3948            H2Error::SettingsTimeout => concat!($prefix, ".settings_timeout"),
3949            H2Error::StreamClosed => concat!($prefix, ".stream_closed"),
3950            H2Error::FrameSizeError => concat!($prefix, ".frame_size_error"),
3951            H2Error::RefusedStream => concat!($prefix, ".refused_stream"),
3952            H2Error::Cancel => concat!($prefix, ".cancel"),
3953            H2Error::CompressionError => concat!($prefix, ".compression_error"),
3954            H2Error::ConnectError => concat!($prefix, ".connect_error"),
3955            H2Error::EnhanceYourCalm => concat!($prefix, ".enhance_your_calm"),
3956            H2Error::InadequateSecurity => concat!($prefix, ".inadequate_security"),
3957            H2Error::HTTP11Required => concat!($prefix, ".http_1_1_required"),
3958        }
3959    };
3960}
3961
3962/// Static metric key for an outbound GOAWAY. Same call shape as the other three
3963/// helpers below — keeps the call sites uniform.
3964fn metric_for_goaway_sent(error: H2Error) -> &'static str {
3965    h2_error_metric_key!("h2.goaway.sent", error)
3966}
3967
3968/// Static metric key for an inbound GOAWAY by raw wire error code. Codes
3969/// outside RFC 9113 §7 fall into the dedicated `…unknown_error` bucket so the
3970/// breakdown stays bounded and operators can still spot non-standard peers.
3971fn metric_for_goaway_received(error_code: u32) -> &'static str {
3972    H2Error::try_from(error_code)
3973        .map(|e| h2_error_metric_key!("h2.goaway.received", e))
3974        .unwrap_or("h2.goaway.received.unknown_error")
3975}
3976
3977/// Static metric key for an outbound RST_STREAM. Mirrors
3978/// [`metric_for_goaway_sent`] under a separate namespace so RST and GOAWAY
3979/// rates can be alerted on independently.
3980fn metric_for_rst_stream_sent(error: H2Error) -> &'static str {
3981    h2_error_metric_key!("h2.rst_stream.sent", error)
3982}
3983
3984/// Static metric key for an inbound RST_STREAM by raw wire error code. Same
3985/// `…unknown_error` fallback as [`metric_for_goaway_received`].
3986fn metric_for_rst_stream_received(error_code: u32) -> &'static str {
3987    H2Error::try_from(error_code)
3988        .map(|e| h2_error_metric_key!("h2.rst_stream.received", e))
3989        .unwrap_or("h2.rst_stream.received.unknown_error")
3990}
3991
3992/// Static metric key for an inbound H2 frame by RFC 9113 §6 frame type.
3993/// Emitted at the `handle_frame` dispatch — single chokepoint that any
3994/// new H2 frame type must traverse, so adding a `Frame::*` variant fails
3995/// the build here. Counts are per-frame, not per-byte; pair with
3996/// `bytes_in` for traffic-mix dashboards.
3997fn h2_frame_rx_metric_key(frame: &Frame) -> &'static str {
3998    match frame {
3999        Frame::Data(_) => "h2.frames.rx.data",
4000        Frame::Headers(_) => "h2.frames.rx.headers",
4001        Frame::PushPromise(_) => "h2.frames.rx.push_promise",
4002        Frame::Priority(_) => "h2.frames.rx.priority",
4003        Frame::RstStream(_) => "h2.frames.rx.rst_stream",
4004        Frame::Settings(_) => "h2.frames.rx.settings",
4005        Frame::Ping(_) => "h2.frames.rx.ping",
4006        Frame::GoAway(_) => "h2.frames.rx.goaway",
4007        Frame::WindowUpdate(_) => "h2.frames.rx.window_update",
4008        Frame::Continuation(_) => "h2.frames.rx.continuation",
4009        Frame::PriorityUpdate(_) => "h2.frames.rx.priority_update",
4010        Frame::Unknown(_) => "h2.frames.rx.unknown",
4011    }
4012}
4013
4014impl<Front: SocketHandler> ConnectionH2<Front> {
4015    pub fn goaway(&mut self, error: H2Error) -> MuxResult {
4016        self.state = H2State::Error;
4017        self.drain.draining = true;
4018        self.expect_read = None;
4019        // Disarm the SETTINGS ACK timer: once we've committed to GOAWAY, the
4020        // timeout check at `readable()` / `flush_pending_control_frames()` must
4021        // not re-fire. Without this, `signal_pending_write()` below re-enters
4022        // `writable()` → `flush_pending_control_frames()` on the next tick,
4023        // the elapsed check is still true, and we emit another
4024        // `warn!` + `goaway()` pair, each bumping `h2.goaway.sent.*`.
4025        self.settings_sent_at = None;
4026        let kawa = &mut self.zero;
4027        kawa.storage.clear();
4028        // Severity tiering: only `InternalError` implies a sozu-side bug when
4029        // WE emit it. Every other non-`NoError` reason is "peer misbehaved,
4030        // sozu defended correctly" — operators don't need paging on abusive
4031        // or buggy peers. Caller sites already log the specific antecedent
4032        // (flood detected, parser failure, SETTINGS timeout, invalid window)
4033        // before reaching `goaway()`, so demoting this summary line avoids
4034        // duplicate noise without hiding the root cause.
4035        match error {
4036            H2Error::NoError => debug!("{} GOAWAY: {:?}", log_context!(self), error),
4037            H2Error::InternalError => error!("{} GOAWAY: {:?}", log_context!(self), error),
4038            _ => warn!("{} GOAWAY: {:?}", log_context!(self), error),
4039        }
4040        count!(metric_for_goaway_sent(error), 1);
4041
4042        // RFC 9113 §6.8: last_stream_id is the highest peer-initiated stream we processed
4043        match serializer::gen_goaway(kawa.storage.space(), self.highest_peer_stream_id, error) {
4044            Ok((_, size)) => {
4045                kawa.storage.fill(size);
4046                incr!(names::h2::FRAMES_TX_GOAWAY);
4047                self.state = H2State::GoAway;
4048                self.expect_write = Some(H2StreamId::Zero);
4049                self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
4050                self.readiness.signal_pending_write();
4051                MuxResult::Continue
4052            }
4053            Err(error) => {
4054                error!(
4055                    "{} Could not serialize GoAwayFrame: {:?}",
4056                    log_context!(self),
4057                    error
4058                );
4059                self.force_disconnect()
4060            }
4061        }
4062    }
4063
4064    /// RFC 9113 §6.8: Initiate graceful shutdown using the double-GOAWAY pattern.
4065    ///
4066    /// First call sends GOAWAY with `last_stream_id = 0x7FFFFFFF` (MAX) to signal
4067    /// the intent to stop accepting new streams while allowing in-flight streams
4068    /// to complete. The connection enters draining mode.
4069    ///
4070    /// When `draining` is already true (second invocation), sends the final GOAWAY
4071    /// with the actual `highest_peer_stream_id` so the peer knows which streams
4072    /// were processed.
4073    pub fn graceful_goaway(&mut self) -> MuxResult {
4074        if self.drain.draining {
4075            // Second GOAWAY: send with the real last_stream_id
4076            return self.goaway(H2Error::NoError);
4077        }
4078
4079        // First GOAWAY: advertise MAX stream ID so the peer knows we are draining
4080        // but does not yet know the cutoff. This gives in-flight requests a chance
4081        // to arrive before we commit to a final last_stream_id.
4082        self.drain.draining = true;
4083        // Arm the forced-close timer from the moment the proxy decides to drain.
4084        // `Mux::shutting_down` samples it against `graceful_shutdown_deadline`
4085        // and returns `true` once the budget is exhausted so the session loop
4086        // tears the connection down instead of waiting forever.
4087        self.drain.started_at = Some(Instant::now());
4088        // Keep expect_read as-is: existing streams should continue reading
4089        // data during the drain window opened by the initial GOAWAY. Only
4090        // the final GOAWAY (via `goaway()`) removes READABLE.
4091        let kawa = &mut self.zero;
4092        kawa.storage.clear();
4093        debug!(
4094            "{} GOAWAY (graceful, initial): last_stream_id=0x7FFFFFFF",
4095            log_context!(self)
4096        );
4097        // The initial GOAWAY sends NO_ERROR on the wire — count it under
4098        // the same per-code key as the final GOAWAY. The downstream alert
4099        // that wants to distinguish drain from termination compares
4100        // against the `h2.goaway.sent.no_error` rate (drain) vs the other
4101        // variants (termination on error).
4102        count!(metric_for_goaway_sent(H2Error::NoError), 1);
4103
4104        match serializer::gen_goaway(kawa.storage.space(), STREAM_ID_MAX, H2Error::NoError) {
4105            Ok((_, size)) => {
4106                kawa.storage.fill(size);
4107                incr!(names::h2::FRAMES_TX_GOAWAY);
4108                // Stay in the current state so the connection can continue processing
4109                // existing streams. The final GOAWAY will transition to GoAway state.
4110                // Keep READABLE so in-flight request bodies can still be received
4111                // during the drain window. Only remove READABLE in the final GOAWAY
4112                // (via `goaway()`).
4113                self.expect_write = Some(H2StreamId::Zero);
4114                self.readiness.arm_writable();
4115                MuxResult::Continue
4116            }
4117            Err(error) => {
4118                error!(
4119                    "{} Could not serialize graceful GoAwayFrame: {:?}",
4120                    log_context!(self),
4121                    error
4122                );
4123                self.force_disconnect()
4124            }
4125        }
4126    }
4127
4128    /// Returns `true` when the graceful-shutdown budget armed by
4129    /// [`Self::graceful_goaway`] has elapsed. A return of `true` signals
4130    /// the enclosing session loop that the proxy-initiated drain must
4131    /// transition to a forced close: remaining streams will not complete
4132    /// in time and keeping the connection open past the deadline defeats
4133    /// the soft-stop SLA.
4134    ///
4135    /// Returns `false` when:
4136    /// - drain has not started yet (`started_at` is `None`),
4137    /// - the knob is `0` / `None` (indefinite wait explicitly opted in),
4138    /// - or the elapsed time is still within the configured budget.
4139    pub fn graceful_shutdown_deadline_elapsed(&self) -> bool {
4140        match (self.drain.started_at, self.drain.graceful_shutdown_deadline) {
4141            (Some(started_at), Some(deadline)) => started_at.elapsed() >= deadline,
4142            _ => false,
4143        }
4144    }
4145
4146    /// Returns `true` if there is data queued waiting to be flushed:
4147    /// - H2 control frames in the zero buffer (GOAWAY, SETTINGS ACK, etc.)
4148    /// - A partially-written stream or control frame (`expect_write`)
4149    /// - Encrypted TLS records in rustls's output buffer not yet flushed to TCP
4150    ///
4151    /// The TLS check is critical: `shutting_down()` uses this to prevent
4152    /// premature session close while response DATA is still in rustls's
4153    /// buffer (accepted by `socket_write_vectored` but not yet on the wire).
4154    ///
4155    /// Does NOT check per-stream `back.out`/`back.blocks`; use
4156    /// [`Self::has_pending_write_full`] on paths that must honour
4157    /// LIFECYCLE invariant 16 (e.g. shutdown-drain).
4158    pub fn has_pending_write(&self) -> bool {
4159        if self.peer_gone_after_final_goaway() {
4160            return false;
4161        }
4162        self.expect_write.is_some()
4163            || !self.zero.storage.is_empty()
4164            || self.socket.socket_wants_write()
4165    }
4166
4167    /// Connection-level [`Self::has_pending_write`] extended with a per-stream
4168    /// back-buffer probe (LIFECYCLE §9 invariant 16). Used by shutdown-drain
4169    /// paths that must not close while any open stream still has outbound
4170    /// kawa bytes queued — a voluntary scheduler yield can leave `back.out`
4171    /// or `back.blocks` non-empty without `expect_write` being set.
4172    pub fn has_pending_write_full<L>(&self, context: &Context<L>) -> bool
4173    where
4174        L: ListenerHandler + L7ListenerHandler,
4175    {
4176        self.has_pending_write() || any_stream_has_pending_back(&self.streams, &context.streams)
4177    }
4178
4179    /// Flush the zero buffer to the socket, counting bytes as connection overhead.
4180    ///
4181    /// Returns `true` if the socket stalled (WouldBlock / zero-length write),
4182    /// meaning the caller should stop writing and wait for the next writable event.
4183    /// Returns `false` when the buffer has been fully drained.
4184    fn flush_zero_to_socket(&mut self) -> bool {
4185        while !self.zero.storage.is_empty() {
4186            let (size, status) = self.socket.socket_write(self.zero.storage.data());
4187            #[cfg(debug_assertions)]
4188            trace!(
4189                "{} flush_zero_to_socket: written={}, status={:?}, wants_write={}",
4190                log_context!(self),
4191                size,
4192                status,
4193                self.socket.socket_wants_write()
4194            );
4195            self.zero.storage.consume(size);
4196            self.position.count_bytes_out_counter(size);
4197            self.bytes.overhead_bout += size;
4198            if update_readiness_after_write(size, status, &mut self.readiness) {
4199                return true;
4200            }
4201        }
4202        // Reset buffer positions after draining. consume() advances start but
4203        // never resets it, so without clear() the next fill would panic.
4204        self.zero.storage.clear();
4205        false
4206    }
4207
4208    /// Directly flush the zero buffer to the socket without going through
4209    /// the full writable() path. Used during shutdown when the event loop
4210    /// won't deliver new epoll events for this session (edge-triggered).
4211    pub fn flush_zero_buffer(&mut self) {
4212        if self.flush_zero_to_socket() {
4213            return;
4214        }
4215        self.expect_write = None;
4216        if self.socket.socket_wants_write() {
4217            let (_size, status) = self.socket.socket_write(&[]);
4218            let _ = update_readiness_after_write(0, status, &mut self.readiness);
4219        }
4220    }
4221
4222    pub fn create_stream<L>(
4223        &mut self,
4224        stream_id: StreamId,
4225        context: &mut Context<L>,
4226    ) -> Option<GlobalStreamId>
4227    where
4228        L: ListenerHandler + L7ListenerHandler,
4229    {
4230        // RFC 9113 §6.8: reject new streams on a draining connection
4231        if self.drain.draining {
4232            error!(
4233                "{} Rejecting new stream {} on draining connection",
4234                log_context!(self),
4235                stream_id
4236            );
4237            return None;
4238        }
4239        // Track the highest peer-initiated stream ID for GoAway frames
4240        // before any early return, so GoAway always reports the correct last stream.
4241        if stream_id > self.highest_peer_stream_id {
4242            self.highest_peer_stream_id = stream_id;
4243        }
4244        let global_stream_id = context.create_stream(
4245            Ulid::generate(),
4246            self.peer_settings.settings_initial_window_size,
4247        )?;
4248        self.last_stream_id = (stream_id + 2) & !1;
4249        self.streams.insert(stream_id, global_stream_id);
4250        self.stream_last_activity_at
4251            .insert(stream_id, Instant::now());
4252        Some(global_stream_id)
4253    }
4254
4255    pub fn new_stream_id(&mut self) -> Option<StreamId> {
4256        let (issued, next) = next_stream_id(self.last_stream_id, self.position.is_client())?;
4257        self.last_stream_id = next;
4258        Some(issued)
4259    }
4260
4261    /// Test-only setter: jump `last_stream_id` close to [`STREAM_ID_MAX`] so
4262    /// that the next call to [`Self::new_stream_id`] exhausts the 31-bit
4263    /// space. FIX-22 ("Stream-ID exhaustion disconnects backend gracefully")
4264    /// exercises the `None`-return branch — reaching it through normal API
4265    /// usage would require issuing ~2³¹ requests, which is not tractable in
4266    /// an E2E harness.
4267    #[cfg(any(test, feature = "e2e-hooks"))]
4268    pub fn __test_set_last_stream_id(&mut self, id: StreamId) {
4269        self.last_stream_id = id;
4270    }
4271
4272    fn handle_frame<E, L>(
4273        &mut self,
4274        frame: Frame,
4275        wire_payload_len: u32,
4276        context: &mut Context<L>,
4277        endpoint: E,
4278    ) -> MuxResult
4279    where
4280        E: Endpoint,
4281        L: ListenerHandler + L7ListenerHandler,
4282    {
4283        trace!("{} {:#?}", log_context!(self), frame);
4284        // Per-frame-type RX counter. Single chokepoint covers every H2 frame
4285        // type — adding a new `Frame::*` variant fails the build inside the
4286        // helper, keeping the metric breakdown in lock-step with RFC 9113 §6.
4287        count!(h2_frame_rx_metric_key(&frame), 1);
4288        match frame {
4289            Frame::Data(data) => self.handle_data_frame(data, wire_payload_len, context, endpoint),
4290            Frame::Headers(headers) => self.handle_headers_frame(headers, context, endpoint),
4291            Frame::PushPromise(_) => self.handle_push_promise_frame(),
4292            Frame::Priority(priority) => self.handle_priority_frame(priority, context, endpoint),
4293            Frame::RstStream(rst_stream) => {
4294                self.handle_rst_stream_frame(rst_stream, context, endpoint)
4295            }
4296            Frame::Settings(settings) => self.handle_settings_frame(settings, context),
4297            Frame::Ping(ping) => self.handle_ping_frame(ping),
4298            Frame::GoAway(goaway) => self.handle_goaway_frame(goaway, context, endpoint),
4299            Frame::WindowUpdate(wu) => self.handle_window_update_frame(wu, context, endpoint),
4300            Frame::PriorityUpdate(pu) => self.handle_priority_update_frame(pu),
4301            Frame::Continuation(_) => {
4302                // Unreachable: standalone CONTINUATION is rejected in
4303                // `handle_header_state` (RFC 9113 §6.10) and in-block
4304                // CONTINUATION is consumed by the inline header-parsing
4305                // path. Keep a defensive fallback that returns
4306                // PROTOCOL_ERROR rather than panicking in debug builds.
4307                self.attribute_bytes_to_overhead();
4308                warn!(
4309                    "{} CONTINUATION frames are handled inline during header parsing",
4310                    log_context!(self)
4311                );
4312                self.goaway(H2Error::ProtocolError)
4313            }
4314            // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
4315            // The parser already consumed the payload; attribute the bytes
4316            // to connection-level overhead and continue.
4317            Frame::Unknown(raw) => {
4318                debug!(
4319                    "{} Ignoring unknown H2 frame type {}",
4320                    log_context!(self),
4321                    raw
4322                );
4323                self.attribute_bytes_to_overhead();
4324                MuxResult::Continue
4325            }
4326        }
4327    }
4328
4329    /// RFC 9110 §8.6: Content-Length validation must be skipped for responses
4330    /// where the body is absent by definition:
4331    /// - Responses to HEAD requests (any status)
4332    /// - 1xx informational responses
4333    /// - 204 No Content
4334    /// - 304 Not Modified
4335    fn content_length_exempt(
4336        &self,
4337        context: &crate::protocol::kawa_h1::editor::HttpContext,
4338    ) -> bool {
4339        use crate::protocol::kawa_h1::parser::Method;
4340        // HEAD method responses (only relevant when reading backend responses)
4341        if self.position.is_client() && context.method == Some(Method::Head) {
4342            return true;
4343        }
4344        // 1xx, 204, 304 status codes
4345        if let Some(status) = context.status {
4346            if (100..200).contains(&status) || status == 204 || status == 304 {
4347                return true;
4348            }
4349        }
4350        false
4351    }
4352
4353    fn handle_data_frame<E, L>(
4354        &mut self,
4355        data: parser::Data,
4356        wire_payload_len: u32,
4357        context: &mut Context<L>,
4358        mut endpoint: E,
4359    ) -> MuxResult
4360    where
4361        E: Endpoint,
4362        L: ListenerHandler + L7ListenerHandler,
4363    {
4364        // CVE-2019-9518: track empty DATA frames (no payload, no END_STREAM)
4365        if data.payload.is_empty() && !data.end_stream {
4366            self.flood_detector.empty_data_count += 1;
4367            check_flood_or_return!(self);
4368        }
4369        let Some(global_stream_id) = self.streams.get(&data.stream_id).copied() else {
4370            // The stream was terminated while data was expected,
4371            // probably due to automatic answer for invalid/unauthorized access.
4372            // RFC 9113 §6.9: we MUST still account for the DATA payload in
4373            // connection-level flow control using the full wire length
4374            // (including pad-length byte and padding), otherwise the window
4375            // shrinks permanently and eventually stalls the connection.
4376            self.flow_control.received_bytes_since_update += wire_payload_len;
4377            let conn_threshold = self.connection_config.initial_connection_window / 2;
4378            if self.flow_control.received_bytes_since_update >= conn_threshold {
4379                let increment = self.flow_control.received_bytes_since_update;
4380                self.queue_window_update(0, increment);
4381                self.flow_control.received_bytes_since_update = 0;
4382                self.readiness.arm_writable();
4383            }
4384            self.attribute_bytes_to_overhead();
4385            return MuxResult::Continue;
4386        };
4387        let mut slice = data.payload;
4388        let stream = &mut context.streams[global_stream_id];
4389        // Unpadded application payload size — what is forwarded to the backend
4390        // and counted against Content-Length.
4391        let content_len = slice.len();
4392        // Full wire-payload size (includes pad-length byte and padding).
4393        // RFC 9113 §5.2: padding counts against flow-control windows.
4394        let wire_len = wire_payload_len as usize;
4395        let cl_exempt = self.content_length_exempt(&stream.context);
4396
4397        // Extract declared content-length and update position-aware data counter
4398        let (data_received, declared_length) = {
4399            let parts = stream.split(&self.position);
4400            *parts.data_received += content_len;
4401            let total = *parts.data_received;
4402            let declared = match parts.rbuffer.body_size {
4403                kawa::BodySize::Length(n) => Some(n),
4404                _ => None,
4405            };
4406            (total, declared)
4407        };
4408
4409        // RFC 9113 §6.9 + §5.2: credit connection-level flow control BEFORE any
4410        // early-return path. Malformed DATA still consumed the peer's send
4411        // window; without crediting it back, repeated bad streams permanently
4412        // shrink the connection window and stall unrelated streams that share
4413        // the same H2 connection. Stream-level credit can stay below — once we
4414        // RST the violating stream, its per-stream window is moot per
4415        // RFC 9113 §6.9 (the receiver discards further frames for the stream).
4416        let conn_threshold = self.connection_config.initial_connection_window / 2;
4417        self.flow_control.received_bytes_since_update += wire_payload_len;
4418        if self.flow_control.received_bytes_since_update >= conn_threshold {
4419            let increment = self.flow_control.received_bytes_since_update;
4420            self.queue_window_update(0, increment);
4421            self.flow_control.received_bytes_since_update = 0;
4422        }
4423
4424        // RFC 9113 §8.1.1: if Content-Length is present, total DATA payload
4425        // must not exceed the declared length (check on every frame).
4426        // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses (body absent by definition).
4427        if !cl_exempt {
4428            if let Some(expected) = declared_length {
4429                if data_received > expected {
4430                    error!(
4431                        "{} Content-Length mismatch: received {} > declared {}",
4432                        log_context!(self),
4433                        data_received,
4434                        expected
4435                    );
4436                    // Pair WRITABLE arming with the queued connection-level
4437                    // WINDOW_UPDATE before returning; otherwise the credit sits
4438                    // until the next inbound frame on this connection.
4439                    if !self.flow_control.pending_window_updates.is_empty() {
4440                        self.readiness.arm_writable();
4441                    }
4442                    let result = self.reset_stream(
4443                        data.stream_id,
4444                        global_stream_id,
4445                        context,
4446                        endpoint,
4447                        H2Error::ProtocolError,
4448                    );
4449                    self.remove_dead_stream(data.stream_id, global_stream_id);
4450                    return result;
4451                }
4452            }
4453        }
4454
4455        let stream = &mut context.streams[global_stream_id];
4456        self.attribute_bytes_to_stream(&mut stream.metrics);
4457        let stream_state = stream.state;
4458        let is_unlinked = matches!(stream_state, StreamState::Unlinked);
4459        let parts = stream.split(&self.position);
4460        let kawa = parts.rbuffer;
4461        self.position.count_bytes_in(parts.metrics, content_len);
4462
4463        // Stream-level flow control (only if stream is still open).
4464        // Connection-level credit was already applied above the CL check so
4465        // malformed DATA cannot starve the connection window for other streams.
4466        if !data.end_stream {
4467            self.queue_window_update(data.stream_id, wire_payload_len);
4468        }
4469
4470        // If we have pending updates, ensure we get a writable event.
4471        // Must use signal_pending_write() — not just interest.insert() — because
4472        // under edge-triggered epoll the WRITABLE event bit may have been consumed
4473        // by a previous write cycle. Without the event bit set, filter_interest()
4474        // returns 0 and the WINDOW_UPDATEs never get flushed, stalling the client.
4475        if !self.flow_control.pending_window_updates.is_empty() {
4476            self.readiness.arm_writable();
4477        }
4478
4479        // Refresh per-stream idle timer on non-empty DATA.
4480        // Empty DATA frames (CVE-2019-9518 vector) must NOT reset the timer,
4481        // otherwise an attacker can keep a stream alive indefinitely with
4482        // zero-length frames while pinning a MAX_CONCURRENT_STREAMS slot.
4483        if content_len > 0 {
4484            if let Some(t) = self.stream_last_activity_at.get_mut(&data.stream_id) {
4485                *t = Instant::now();
4486            }
4487        }
4488
4489        if is_unlinked {
4490            // Backend is gone but client is still sending DATA.
4491            // Discard the data (flow control updates were already
4492            // queued above) to prevent the buffer from filling up.
4493            kawa.storage.clear();
4494            if data.end_stream {
4495                kawa.parsing_phase = kawa::ParsingPhase::Terminated;
4496                self.mark_end_of_stream(stream);
4497            }
4498        } else {
4499            // Advance storage.head by the full wire payload length so the
4500            // next frame doesn't read stale pad-length+padding bytes.
4501            slice.start = slice.start.saturating_add(kawa.storage.head as u32);
4502            kawa.storage.head += wire_len;
4503
4504            // Emit chunk framing for chunked transfer encoding (H2→H1 path).
4505            // H2 converter ignores ChunkHeader and end_chunk Flags, so this is safe for H2→H2.
4506            if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
4507                let hex_len = {
4508                    let mut buf = Vec::with_capacity(16);
4509                    let _ = write!(buf, "{content_len:x}");
4510                    buf
4511                };
4512                kawa.push_block(kawa::Block::ChunkHeader(kawa::ChunkHeader {
4513                    length: kawa::Store::from_vec(hex_len),
4514                }));
4515            }
4516
4517            kawa.push_block(kawa::Block::Chunk(kawa::Chunk {
4518                data: kawa::Store::Slice(slice),
4519            }));
4520
4521            if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
4522                kawa.push_block(kawa::Block::Flags(kawa::Flags {
4523                    end_body: false,
4524                    end_chunk: true,
4525                    end_header: false,
4526                    end_stream: false,
4527                }));
4528            }
4529
4530            if data.end_stream {
4531                // RFC 9113 §8.1.1: on end_stream, total DATA must equal Content-Length.
4532                // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
4533                if !cl_exempt {
4534                    if let Some(expected) = declared_length {
4535                        if data_received != expected {
4536                            error!(
4537                                "{} Content-Length mismatch: received {} != declared {}",
4538                                log_context!(self),
4539                                data_received,
4540                                expected
4541                            );
4542                            let result = self.reset_stream(
4543                                data.stream_id,
4544                                global_stream_id,
4545                                context,
4546                                endpoint,
4547                                H2Error::ProtocolError,
4548                            );
4549                            self.remove_dead_stream(data.stream_id, global_stream_id);
4550                            return result;
4551                        }
4552                    }
4553                }
4554                let is_chunked = kawa.body_size == kawa::BodySize::Chunked;
4555                kawa.push_block(kawa::Block::Flags(kawa::Flags {
4556                    end_body: true,
4557                    end_chunk: is_chunked,
4558                    end_header: false,
4559                    end_stream: true,
4560                }));
4561                kawa.parsing_phase = kawa::ParsingPhase::Terminated;
4562                self.mark_end_of_stream(stream);
4563            }
4564            if let StreamState::Linked(token) = stream_state {
4565                // Mirror of h1.rs:361-368 for the H2-backend → H2-frontend
4566                // path: edge-triggered epoll will NOT re-fire for bytes we
4567                // just pushed into stream.back; the synthetic event is the
4568                // only wake path. LIFECYCLE invariant 15.
4569                endpoint.readiness_mut(token).arm_writable();
4570                incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_DATA);
4571            }
4572        }
4573        MuxResult::Continue
4574    }
4575
4576    fn handle_headers_frame<E, L>(
4577        &mut self,
4578        headers: Headers,
4579        context: &mut Context<L>,
4580        mut endpoint: E,
4581    ) -> MuxResult
4582    where
4583        E: Endpoint,
4584        L: ListenerHandler + L7ListenerHandler,
4585    {
4586        // HEADERS frames represent real application activity (new request
4587        // or response). Reset the timeout since the peer is actively
4588        // communicating, unlike control frames (PING, WINDOW_UPDATE).
4589        self.timeout_container.reset();
4590        if !headers.end_headers {
4591            // CVE-2024-27316: only initialize tracking on the very first HEADERS
4592            // fragment, not on re-entries from ContinuationFrame (which call
4593            // handle_frame(Frame::Headers) with the accumulated header block).
4594            if self.flood_detector.continuation_count == 0 {
4595                self.flood_detector.accumulated_header_size = headers.header_block_fragment.len;
4596            }
4597            debug!(
4598                "{} FRAGMENT: stream_id={}, len={}",
4599                log_context!(self),
4600                headers.stream_id,
4601                self.zero.storage.data().len()
4602            );
4603            self.state = H2State::ContinuationHeader(headers);
4604            return MuxResult::Continue;
4605        }
4606        // Header block is complete — reset CONTINUATION counters
4607        self.flood_detector.reset_continuation();
4608        // can this fail?
4609        let stream_id = headers.stream_id;
4610        let Some(global_stream_id) = self.streams.get(&stream_id).copied() else {
4611            error!(
4612                "{} Handling Headers frame with no attached stream {:#?}",
4613                log_context!(self),
4614                self
4615            );
4616            incr!(names::h2::HEADERS_NO_STREAM_ERROR);
4617            self.attribute_bytes_to_overhead();
4618            return self.force_disconnect();
4619        };
4620
4621        // Refresh per-stream idle timer on HEADERS (response headers or trailers
4622        // on an existing stream). Initial HEADERS that create the stream already
4623        // set the timestamp in create_stream().
4624        if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
4625            *t = Instant::now();
4626        }
4627
4628        if let Some(priority) = &headers.priority {
4629            if self.prioriser.push_priority(stream_id, priority.clone()) {
4630                self.reset_stream(
4631                    stream_id,
4632                    global_stream_id,
4633                    context,
4634                    endpoint,
4635                    H2Error::ProtocolError,
4636                );
4637                self.remove_dead_stream(stream_id, global_stream_id);
4638                return MuxResult::Continue;
4639            }
4640        }
4641
4642        let stream = &mut context.streams[global_stream_id];
4643        self.attribute_bytes_to_stream(&mut stream.metrics);
4644        let kawa = &mut self.zero;
4645        let buffer = headers.header_block_fragment.data(kawa.storage.buffer());
4646        let stream = &mut context.streams[global_stream_id];
4647        let parts = &mut stream.split(&self.position);
4648        let was_initial = parts.rbuffer.is_initial();
4649        let elide_x_real_ip = parts.context.elide_x_real_ip;
4650        let status = pkawa::handle_header(
4651            &mut self.decoder,
4652            &mut self.prioriser,
4653            stream_id,
4654            parts.rbuffer,
4655            buffer,
4656            headers.end_stream,
4657            parts.context,
4658            self.flood_detector.config.max_header_list_size,
4659            elide_x_real_ip,
4660        );
4661        kawa.storage.clear();
4662        if let Err((error, global)) = status {
4663            match self.position {
4664                Position::Client(..) => incr!(names::http::BACKEND_PARSE_ERRORS),
4665                Position::Server => incr!(names::http::FRONTEND_PARSE_ERRORS),
4666            }
4667            if global {
4668                error!(
4669                    "{} GOT GLOBAL ERROR WHILE PROCESSING HEADERS",
4670                    log_context!(self)
4671                );
4672                return self.goaway(error);
4673            } else {
4674                let result =
4675                    self.reset_stream(stream_id, global_stream_id, context, endpoint, error);
4676                self.remove_dead_stream(stream_id, global_stream_id);
4677                return result;
4678            }
4679        }
4680        if headers.end_stream {
4681            // RFC 9113 §8.1.1: when END_STREAM arrives via trailers,
4682            // validate that total DATA received matches Content-Length.
4683            // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
4684            if !was_initial && !self.content_length_exempt(&stream.context) {
4685                let parts = stream.split(&self.position);
4686                if let kawa::BodySize::Length(expected) = parts.rbuffer.body_size {
4687                    if *parts.data_received != expected {
4688                        error!(
4689                            "{} Content-Length mismatch on trailers: received {} != declared {}",
4690                            log_context!(self),
4691                            *parts.data_received,
4692                            expected
4693                        );
4694                        let result = self.reset_stream(
4695                            stream_id,
4696                            global_stream_id,
4697                            context,
4698                            endpoint,
4699                            H2Error::ProtocolError,
4700                        );
4701                        self.remove_dead_stream(stream_id, global_stream_id);
4702                        return result;
4703                    }
4704                }
4705            }
4706            self.mark_end_of_stream(stream);
4707        }
4708        if let StreamState::Linked(token) = stream.state {
4709            // Mirror of handle_data_frame's rearm. LIFECYCLE invariant 15.
4710            endpoint.readiness_mut(token).arm_writable();
4711            incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_HEADERS);
4712        }
4713        // was_initial prevents trailers from triggering connection
4714        if was_initial && self.position.is_server() {
4715            incr!(names::http::REQUESTS);
4716            gauge_add!(names::http::ACTIVE_REQUESTS, 1);
4717            stream.metrics.service_start();
4718            stream.request_counted = true;
4719            stream.state = StreamState::Link;
4720            context.pending_links.push_back(global_stream_id);
4721        }
4722        MuxResult::Continue
4723    }
4724
4725    fn handle_push_promise_frame(&mut self) -> MuxResult {
4726        self.attribute_bytes_to_overhead();
4727        match self.position {
4728            Position::Client(..) => {
4729                // RFC 9113 §8.4: Server push is deprecated. Sozu never sends
4730                // SETTINGS_ENABLE_PUSH=1, so receiving PUSH_PROMISE is a protocol error.
4731                error!(
4732                    "{} Received PUSH_PROMISE but server push is not supported",
4733                    log_context!(self)
4734                );
4735                self.goaway(H2Error::ProtocolError)
4736            }
4737            Position::Server => {
4738                // Clients must never send PUSH_PROMISE (RFC 9113 §8.4)
4739                error!("{} Received PUSH_PROMISE from client", log_context!(self));
4740                self.goaway(H2Error::ProtocolError)
4741            }
4742        }
4743    }
4744
4745    fn handle_priority_frame<E, L>(
4746        &mut self,
4747        priority: parser::Priority,
4748        context: &mut Context<L>,
4749        endpoint: E,
4750    ) -> MuxResult
4751    where
4752        E: Endpoint,
4753        L: ListenerHandler + L7ListenerHandler,
4754    {
4755        if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
4756            let stream = &mut context.streams[global_stream_id];
4757            self.attribute_bytes_to_stream(&mut stream.metrics);
4758        } else {
4759            self.attribute_bytes_to_overhead();
4760        }
4761        // Pass 3 Medium #4: standalone PRIORITY frames can arrive for any
4762        // peer-chosen stream ID. Accept only currently-open streams and a
4763        // small idle look-ahead window; everything else is dropped before
4764        // it can feed memory into the priority map.
4765        if self.prioriser.push_priority_guarded(
4766            priority.stream_id,
4767            priority.inner,
4768            self.last_stream_id,
4769            &self.streams,
4770        ) {
4771            if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
4772                let result = self.reset_stream(
4773                    priority.stream_id,
4774                    global_stream_id,
4775                    context,
4776                    endpoint,
4777                    H2Error::ProtocolError,
4778                );
4779                self.remove_dead_stream(priority.stream_id, global_stream_id);
4780                return result;
4781            } else {
4782                error!(
4783                    "{} INVALID PRIORITY RECEIVED ON INVALID STREAM",
4784                    log_context!(self)
4785                );
4786                return self.goaway(H2Error::ProtocolError);
4787            }
4788        }
4789        MuxResult::Continue
4790    }
4791
4792    /// RFC 9218 §7.1: PRIORITY_UPDATE reprioritizes an open or idle-soon
4793    /// stream at the connection level. Decodes the priority field value
4794    /// (same grammar as the `priority` request header, `parse_rfc9218_priority`)
4795    /// and pushes it into the `Prioriser` through the same guarded path used
4796    /// for standalone PRIORITY frames — the guard bounds memory against a
4797    /// client spamming PRIORITY_UPDATE for far-future stream IDs.
4798    ///
4799    /// Prioritized stream ID `0` is a connection-level `PROTOCOL_ERROR`
4800    /// (RFC 9218 §7.1). For any other ID that is not currently open or
4801    /// within the idle look-ahead budget, the update is silently dropped
4802    /// (matches the PRIORITY-frame guard semantics — no state change).
4803    fn handle_priority_update_frame(&mut self, pu: parser::PriorityUpdate) -> MuxResult {
4804        self.attribute_bytes_to_overhead();
4805        if pu.prioritized_stream_id == 0 {
4806            error!(
4807                "{} PRIORITY_UPDATE with prioritized_stream_id=0 (RFC 9218 §7.1)",
4808                log_context!(self)
4809            );
4810            return self.goaway(H2Error::ProtocolError);
4811        }
4812        let (urgency, incremental) = pkawa::parse_rfc9218_priority(&pu.priority_field_value);
4813        let (prev_urgency, _) = self.prioriser.get(&pu.prioritized_stream_id);
4814        trace!(
4815            "{} PRIORITY_UPDATE stream={} urgency={}->{} incremental={} rearmed_writable=true",
4816            log_context!(self),
4817            pu.prioritized_stream_id,
4818            prev_urgency,
4819            urgency,
4820            incremental
4821        );
4822        let _ = self.prioriser.push_priority_guarded(
4823            pu.prioritized_stream_id,
4824            parser::PriorityPart::Rfc9218 {
4825                urgency,
4826                incremental,
4827            },
4828            self.last_stream_id,
4829            &self.streams,
4830        );
4831        // LIFECYCLE invariant 15: reprioritisation only changes ordering for
4832        // the NEXT write pass. Under ET epoll, if finalize_write already
4833        // stripped WRITABLE, the scheduler won't re-run without a synthetic
4834        // wake — pair the interest insert with signal_pending_write.
4835        self.readiness.arm_writable();
4836        incr!(names::h2::SIGNAL_WRITABLE_REARMED_PRIORITY_UPDATE);
4837        MuxResult::Continue
4838    }
4839
4840    fn handle_rst_stream_frame<E, L>(
4841        &mut self,
4842        rst_stream: parser::RstStream,
4843        context: &mut Context<L>,
4844        mut endpoint: E,
4845    ) -> MuxResult
4846    where
4847        E: Endpoint,
4848        L: ListenerHandler + L7ListenerHandler,
4849    {
4850        // Per-error-code counter for the inbound RST. Emitted before the
4851        // flood-detector trip check so even a connection that gets terminated
4852        // by `handle_flood_violation` shows up in the per-code breakdown
4853        // (the dedicated `h2.flood.violation.rst_stream_*` series tracks the
4854        // mitigation event itself).
4855        count!(metric_for_rst_stream_received(rst_stream.error_code), 1);
4856        // CVE-2023-44487 Rapid Reset + CVE-2019-9514: track RST_STREAM rate.
4857        self.flood_detector.rst_stream_count += 1;
4858        check_flood_or_return!(self);
4859        // Additional CVE-2023-44487 mitigation: lifetime cap on RST_STREAM
4860        // frames received. The per-window counter above half-decays, so a
4861        // patient client can keep ~50 RST/s forever; a never-decaying
4862        // lifetime counter puts an absolute ceiling on that amplification.
4863        // Streams whose backend response has not yet started count toward a
4864        // much lower "abusive" ceiling — this is the signature Rapid Reset
4865        // pattern where the attacker pays one RST frame and we pay a
4866        // backend round-trip for each.
4867        //
4868        // "Response started" here means the Server has begun producing
4869        // response bytes (backend kawa buffer past its initial phase). For
4870        // the Client position the concept does not apply symmetrically
4871        // (RSTs received from the backend are rare and benign), so we
4872        // conservatively flag them as abusive too — lifetime cap still
4873        // dominates in practice.
4874        let response_started = match self.streams.get(&rst_stream.stream_id) {
4875            Some(global_stream_id) => {
4876                let stream = &context.streams[*global_stream_id];
4877                !stream.back.is_initial()
4878            }
4879            // Stream already gone (e.g. closed, not yet registered) —
4880            // treat as response-started to avoid over-counting benign
4881            // races as abusive.
4882            None => true,
4883        };
4884        if let Some(violation) = self.flood_detector.record_rst_lifetime(response_started) {
4885            return self.handle_flood_violation(violation);
4886        }
4887        // Rapid Reset signature (CVE-2023-44487): a RST that arrives before the
4888        // backend has begun answering. Emitted alongside the per-code counter
4889        // so the SOC can alert on the rate of pre-response RSTs without
4890        // having to differentiate by error code.
4891        if !response_started {
4892            count!(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START, 1);
4893        }
4894        debug!(
4895            "{} RstStream({} -> {})",
4896            log_context!(self),
4897            rst_stream.error_code,
4898            H2Error::try_from(rst_stream.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str())
4899        );
4900        // Compute totals before removing the stream from the map,
4901        // so the removed stream's bytes are included in the total.
4902        let rst_byte_totals = self.compute_stream_byte_totals(context);
4903        if let Some(global_stream_id) = self.streams.get(&rst_stream.stream_id).copied() {
4904            let stream = &mut context.streams[global_stream_id];
4905            self.attribute_bytes_to_stream(&mut stream.metrics);
4906            let linked_token = stream.linked_token();
4907            let (client_rtt, server_rtt) =
4908                Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
4909            if let Some(token) = linked_token {
4910                endpoint.end_stream(token, global_stream_id, context);
4911            }
4912            let stream = &mut context.streams[global_stream_id];
4913            match &self.position {
4914                // Inbound RST_STREAM on the backend side terminates the in-flight
4915                // request without going through Connection::end_stream (the normal
4916                // place where Backend.active_requests is decremented), so do the
4917                // bookkeeping explicitly here to avoid leaking load counters.
4918                Position::Client(_, backend, BackendStatus::Connected) => {
4919                    let mut backend_borrow = backend.borrow_mut();
4920                    backend_borrow.active_requests =
4921                        backend_borrow.active_requests.saturating_sub(1);
4922                }
4923                Position::Client(..) => {}
4924                Position::Server => {
4925                    self.distribute_overhead(&mut stream.metrics, rst_byte_totals);
4926                    // This is a special case, normally, all stream are terminated by the server
4927                    // when the last byte of the response is written. Here, the reset is requested
4928                    // on the server endpoint and immediately terminates, shortcutting the other path
4929                    stream.metrics.backend_stop();
4930                    stream.generate_access_log(
4931                        true,
4932                        Some("H2::ResetFrame"),
4933                        context.listener.clone(),
4934                        client_rtt,
4935                        server_rtt,
4936                    );
4937                    stream.state = StreamState::Recycle;
4938                }
4939            }
4940            // Retire from streams/prioriser/stream_last_activity_at and
4941            // invalidate expect_write/expect_read if they reference this gid.
4942            self.remove_dead_stream(rst_stream.stream_id, global_stream_id);
4943        } else {
4944            self.attribute_bytes_to_overhead();
4945        }
4946        MuxResult::Continue
4947    }
4948
4949    fn handle_settings_frame<L>(
4950        &mut self,
4951        settings: parser::Settings,
4952        context: &mut Context<L>,
4953    ) -> MuxResult
4954    where
4955        L: ListenerHandler + L7ListenerHandler,
4956    {
4957        if settings.ack {
4958            // RFC 9113 §6.5: SETTINGS ACK must have empty payload
4959            if !settings.settings.is_empty() {
4960                error!("{} SETTINGS ACK with non-empty payload", log_context!(self));
4961                return self.goaway(H2Error::FrameSizeError);
4962            }
4963            // RFC 9113 §6.5: peer acknowledged our SETTINGS — clear timeout
4964            self.settings_sent_at = None;
4965            // RFC 7541 §4.2: sync the decoder's max allowed table size with
4966            // what we advertised. Currently a no-op (settings don't change at
4967            // runtime), but guards against future runtime SETTINGS updates.
4968            self.decoder.set_max_allowed_table_size(
4969                self.local_settings.settings_header_table_size as usize,
4970            );
4971            self.attribute_bytes_to_overhead();
4972            return MuxResult::Continue;
4973        }
4974        // CVE-2019-9515: track SETTINGS frame rate
4975        self.flood_detector.settings_count += 1;
4976        self.flood_detector.total_settings_received_lifetime = self
4977            .flood_detector
4978            .total_settings_received_lifetime
4979            .saturating_add(1);
4980        check_flood_or_return!(self);
4981        for setting in settings.settings {
4982            let v = setting.value;
4983            let mut is_error = false;
4984            #[rustfmt::skip]
4985            match setting.identifier {
4986                parser::SETTINGS_HEADER_TABLE_SIZE => {
4987                    // Cap to the configured maximum — a malicious peer can
4988                    // advertise up to 4 GB to inflate HPACK encoder memory.
4989                    let cap = self.flood_detector.config.max_header_table_size;
4990                    let capped = v.min(cap);
4991                    self.peer_settings.settings_header_table_size = capped;
4992                    self.encoder.set_max_table_size(capped as usize);
4993                    // RFC 7541 §4.2 / §6.3: queue a dynamic-table-size-update
4994                    // HPACK directive for the next header block we emit.
4995                    // Without it, the peer's decoder keeps its previous (possibly
4996                    // larger) table cap and our encoder-side change is silent
4997                    // — conformance suites (h2spec `hpack/4.2`) will flag it.
4998                    self.pending_table_size_update = Some(capped);
4999                },
5000                parser::SETTINGS_ENABLE_PUSH       => { self.peer_settings.settings_enable_push = v == 1;             is_error |= v > 1 },
5001                parser::SETTINGS_MAX_CONCURRENT_STREAMS => { self.peer_settings.settings_max_concurrent_streams = v },
5002                parser::SETTINGS_INITIAL_WINDOW_SIZE    => { is_error |= self.update_initial_window_size(v, context) },
5003                parser::SETTINGS_MAX_FRAME_SIZE         => { self.peer_settings.settings_max_frame_size = v;           is_error |= !(MIN_MAX_FRAME_SIZE..MAX_MAX_FRAME_SIZE).contains(&v) },
5004                parser::SETTINGS_MAX_HEADER_LIST_SIZE   => { self.peer_settings.settings_max_header_list_size = v },
5005                parser::SETTINGS_ENABLE_CONNECT_PROTOCOL => { self.peer_settings.settings_enable_connect_protocol = v == 1; is_error |= v > 1 },
5006                parser::SETTINGS_NO_RFC7540_PRIORITIES   => { self.peer_settings.settings_no_rfc7540_priorities = v == 1;   is_error |= v > 1 },
5007                other => { warn!("Unknown setting_id: {}, we MUST ignore this", other); self.flood_detector.glitch_count += 1 },
5008            };
5009            if is_error {
5010                error!("{} INVALID SETTING", log_context!(self));
5011                return self.goaway(H2Error::ProtocolError);
5012            }
5013        }
5014
5015        self.attribute_bytes_to_overhead();
5016
5017        // Enlarge the connection-level receive window for backend H2
5018        // connections (Position::Client). The server side does this in
5019        // the ServerSettings writable path, but the client needs to do
5020        // it here after receiving the server's initial SETTINGS.
5021        if self.position.is_client()
5022            && self.flow_control.window <= DEFAULT_INITIAL_WINDOW_SIZE as i32
5023        {
5024            let increment = self
5025                .connection_config
5026                .initial_connection_window
5027                .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
5028            if increment > 0 {
5029                self.queue_window_update(0, increment);
5030            }
5031            // Do NOT increment flow_control.window here: sending our own
5032            // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
5033            // Our send window is only updated by WINDOW_UPDATEs we receive
5034            // from the peer (RFC 9113 §6.9).
5035        }
5036
5037        let kawa = &mut self.zero;
5038        let ack = &serializer::SETTINGS_ACKNOWLEDGEMENT;
5039        let buf = kawa.storage.space();
5040        if buf.len() < ack.len() {
5041            error!(
5042                "{} No space in zero buffer for SETTINGS ACK ({} available, {} needed)",
5043                log_context!(self),
5044                buf.len(),
5045                ack.len()
5046            );
5047            return self.force_disconnect();
5048        }
5049        buf[..ack.len()].copy_from_slice(ack);
5050        kawa.storage.fill(ack.len());
5051
5052        self.readiness.interest.insert(Ready::WRITABLE);
5053        self.readiness.interest.remove(Ready::READABLE);
5054        self.expect_write = Some(H2StreamId::Zero);
5055        self.readiness.signal_pending_write();
5056        MuxResult::Continue
5057    }
5058
5059    fn handle_ping_frame(&mut self, ping: parser::Ping) -> MuxResult {
5060        if ping.ack {
5061            self.attribute_bytes_to_overhead();
5062            return MuxResult::Continue;
5063        }
5064        // CVE-2019-9512: track non-ACK PING frame rate
5065        self.flood_detector.ping_count += 1;
5066        self.flood_detector.total_ping_received_lifetime = self
5067            .flood_detector
5068            .total_ping_received_lifetime
5069            .saturating_add(1);
5070        check_flood_or_return!(self);
5071        self.attribute_bytes_to_overhead();
5072        let kawa = &mut self.zero;
5073        let ping_response_size = serializer::PING_ACKNOWLEDGEMENT_HEADER.len() + 8;
5074        if kawa.storage.space().len() < ping_response_size {
5075            error!(
5076                "{} No space in zero buffer for PING response ({} available, {} needed)",
5077                log_context!(self),
5078                kawa.storage.space().len(),
5079                ping_response_size
5080            );
5081            return self.force_disconnect();
5082        }
5083        match serializer::gen_ping_acknowledgement(kawa.storage.space(), &ping.payload) {
5084            Ok((_, size)) => {
5085                kawa.storage.fill(size);
5086                incr!(names::h2::FRAMES_TX_PING_ACK);
5087            }
5088            Err(error) => {
5089                error!(
5090                    "{} Could not serialize PingFrame: {:?}",
5091                    log_context!(self),
5092                    error
5093                );
5094                return self.force_disconnect();
5095            }
5096        };
5097        self.readiness.interest.insert(Ready::WRITABLE);
5098        self.readiness.interest.remove(Ready::READABLE);
5099        self.expect_write = Some(H2StreamId::Zero);
5100        self.readiness.signal_pending_write();
5101        MuxResult::Continue
5102    }
5103
5104    fn handle_goaway_frame<E, L>(
5105        &mut self,
5106        goaway: parser::GoAway,
5107        context: &mut Context<L>,
5108        mut endpoint: E,
5109    ) -> MuxResult
5110    where
5111        E: Endpoint,
5112        L: ListenerHandler + L7ListenerHandler,
5113    {
5114        self.attribute_bytes_to_overhead();
5115        let error_name =
5116            H2Error::try_from(goaway.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str());
5117        if goaway.error_code == H2Error::NoError as u32 {
5118            debug!(
5119                "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5120                log_context!(self),
5121                goaway.last_stream_id,
5122                error_name,
5123                goaway.additional_debug_data
5124            );
5125        } else {
5126            // Peer-originated failure: no variant of H2Error from a peer
5127            // implies a sozu bug. Impact handling is separate (retry above
5128            // `last_stream_id`, RST_STREAM for consumed streams) and logs
5129            // its own details below, so the summary drops to `warn!`.
5130            warn!(
5131                "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5132                log_context!(self),
5133                goaway.last_stream_id,
5134                error_name,
5135                goaway.additional_debug_data
5136            );
5137        }
5138        count!(metric_for_goaway_received(goaway.error_code), 1);
5139        // RFC 9113 §6.8: begin graceful drain.
5140        self.drain.draining = true;
5141        self.drain.peer_last_stream_id = Some(goaway.last_stream_id);
5142
5143        // Streams with ID > last_stream_id were NOT processed by the peer.
5144        // Mark them for retry (StreamState::Link) so they can be retried
5145        // on a new connection.
5146        // IMPORTANT: do NOT call endpoint.end_stream() here — that would
5147        // remove the stream from the frontend's H2 stream map and send
5148        // RST_STREAM to the client, killing the request instead of retrying it.
5149        let mut retry_streams = Vec::new();
5150        for (&stream_id, &global_stream_id) in &self.streams {
5151            if stream_id > goaway.last_stream_id {
5152                retry_streams.push((stream_id, global_stream_id));
5153            }
5154        }
5155        for (stream_id, global_stream_id) in &retry_streams {
5156            // Remove from reverse index before transitioning away from Linked.
5157            if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5158                remove_backend_stream(&mut context.backend_streams, token, *global_stream_id);
5159            }
5160            let stream = &mut context.streams[*global_stream_id];
5161            if stream.front.consumed {
5162                // Request was already sent to this backend — we can't
5163                // replay it. Use the linked token's readiness (via endpoint)
5164                // so the RST_STREAM reaches the client.
5165                debug!(
5166                    "{} GOAWAY: stream {} already consumed, cannot retry",
5167                    log_context!(self),
5168                    stream_id
5169                );
5170                if let StreamState::Linked(token) = stream.state {
5171                    let front_readiness = endpoint.readiness_mut(token);
5172                    forcefully_terminate_answer(stream, front_readiness, H2Error::RefusedStream);
5173                } else {
5174                    warn!(
5175                        "{} GOAWAY: stream {} consumed but not Linked, cannot notify frontend",
5176                        log_context!(self),
5177                        stream_id
5178                    );
5179                }
5180            } else {
5181                stream.state = StreamState::Link;
5182                context.pending_links.push_back(*global_stream_id);
5183            }
5184            // Both retry (!consumed) and terminated (consumed) paths remove the
5185            // stream from self.streams without going through Connection::end_stream,
5186            // so decrement Backend.active_requests here to keep load metrics honest.
5187            if let Position::Client(_, backend, BackendStatus::Connected) = &self.position {
5188                let mut backend_borrow = backend.borrow_mut();
5189                backend_borrow.active_requests = backend_borrow.active_requests.saturating_sub(1);
5190            }
5191            // Retire from streams/prioriser/stream_last_activity_at and
5192            // invalidate expect_write/expect_read if they reference this gid.
5193            self.remove_dead_stream(*stream_id, *global_stream_id);
5194        }
5195
5196        // If no active streams remain, close immediately
5197        if self.streams.is_empty() {
5198            return self.goaway(H2Error::NoError);
5199        }
5200
5201        // Otherwise, let remaining streams (ID <= last_stream_id) complete.
5202        // The connection will be closed when all streams finish.
5203        MuxResult::Continue
5204    }
5205
5206    fn handle_window_update_frame<E, L>(
5207        &mut self,
5208        wu: WindowUpdate,
5209        context: &mut Context<L>,
5210        endpoint: E,
5211    ) -> MuxResult
5212    where
5213        E: Endpoint,
5214        L: ListenerHandler + L7ListenerHandler,
5215    {
5216        let stream_id = wu.stream_id;
5217        let increment = wu.increment;
5218
5219        // RFC 9113 §6.9: increment of 0 MUST be treated as an error.
5220        // Connection-level (stream 0) -> connection error (GOAWAY).
5221        // Stream-level -> stream error (RST_STREAM).
5222        if increment == 0 {
5223            if stream_id == 0 {
5224                error!(
5225                    "{} WINDOW_UPDATE with zero increment on connection (stream 0)",
5226                    log_context!(self)
5227                );
5228                return self.goaway(H2Error::ProtocolError);
5229            } else {
5230                error!(
5231                    "{} WINDOW_UPDATE with zero increment on stream {}",
5232                    log_context!(self),
5233                    stream_id
5234                );
5235                if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5236                    let result = self.reset_stream(
5237                        stream_id,
5238                        global_stream_id,
5239                        context,
5240                        endpoint,
5241                        H2Error::ProtocolError,
5242                    );
5243                    self.remove_dead_stream(stream_id, global_stream_id);
5244                    return result;
5245                }
5246                // Stream not in map (already closed) — treat as glitch
5247                self.flood_detector.glitch_count += 1;
5248                check_flood_or_return!(self);
5249                self.attribute_bytes_to_overhead();
5250                return MuxResult::Continue;
5251            }
5252        }
5253
5254        // The parser masks the reserved bit (STREAM_ID_MASK), so increment <=
5255        // 2^31-1 and try_from always succeeds. Use try_from rather than `as` to
5256        // guard against a future parser change that drops the mask.
5257        let increment = i32::try_from(increment).unwrap_or(i32::MAX);
5258        if stream_id == 0 {
5259            // Count connection-level WINDOW_UPDATEs before touching the window
5260            // so a per-window flood stops us before we pay the arithmetic cost
5261            // on a million-frame burst. Zero-increment frames short-circuited
5262            // above, so every increment here is a legal-looking rate consumer.
5263            self.flood_detector.window_update_stream0_count = self
5264                .flood_detector
5265                .window_update_stream0_count
5266                .saturating_add(1);
5267            check_flood_or_return!(self);
5268            self.attribute_bytes_to_overhead();
5269            if let Some(window) = self.flow_control.window.checked_add(increment) {
5270                if self.flow_control.window <= 0 && window > 0 {
5271                    self.readiness.arm_writable();
5272                }
5273                self.flow_control.window = window;
5274                debug!(
5275                    "{} WINDOW_UPDATE received: stream=0 increment={} new_connection_window={}",
5276                    log_context!(self),
5277                    increment,
5278                    self.flow_control.window
5279                );
5280            } else {
5281                error!("{} INVALID WINDOW INCREMENT", log_context!(self));
5282                return self.goaway(H2Error::FlowControlError);
5283            }
5284        } else if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5285            let stream = &mut context.streams[global_stream_id];
5286            self.attribute_bytes_to_stream(&mut stream.metrics);
5287            if let Some(window) = stream.window.checked_add(increment) {
5288                if stream.window <= 0 && window > 0 {
5289                    self.readiness.arm_writable();
5290                }
5291                stream.window = window;
5292                debug!(
5293                    "{} WINDOW_UPDATE received: stream={} increment={} new_stream_window={}",
5294                    log_context!(self),
5295                    stream_id,
5296                    increment,
5297                    stream.window
5298                );
5299            } else {
5300                let result = self.reset_stream(
5301                    stream_id,
5302                    global_stream_id,
5303                    context,
5304                    endpoint,
5305                    H2Error::FlowControlError,
5306                );
5307                self.remove_dead_stream(stream_id, global_stream_id);
5308                return result;
5309            }
5310        } else {
5311            self.attribute_bytes_to_overhead();
5312            trace!(
5313                "{} Ignoring window update on closed stream {}: {}",
5314                log_context!(self),
5315                stream_id,
5316                increment
5317            );
5318            // Pass 3 Low #5: WINDOW_UPDATE on a closed stream is legal
5319            // (RFC 9113 §6.9.1) but has no useful effect, so a peer that
5320            // keeps sending them is wasting our cycles. Count it as a
5321            // glitch so a flood contributes to `check_flood()` and can
5322            // eventually trigger ENHANCE_YOUR_CALM.
5323            self.flood_detector.glitch_count += 1;
5324            check_flood_or_return!(self);
5325        }
5326        MuxResult::Continue
5327    }
5328
5329    fn update_initial_window_size<L>(&mut self, value: u32, context: &mut Context<L>) -> bool
5330    where
5331        L: ListenerHandler + L7ListenerHandler,
5332    {
5333        if value > FLOW_CONTROL_MAX_WINDOW {
5334            return true;
5335        }
5336        let delta = match i32::try_from(
5337            value as i64 - self.peer_settings.settings_initial_window_size as i64,
5338        ) {
5339            Ok(d) => d,
5340            Err(_) => {
5341                error!("{} initial window size delta overflow", log_context!(self));
5342                return true;
5343            }
5344        };
5345        let mut open_window = false;
5346        // Only update windows for streams owned by this connection
5347        for &global_stream_id in self.streams.values() {
5348            let stream = &mut context.streams[global_stream_id];
5349            // RFC 9113 §6.9.2: changes to SETTINGS_INITIAL_WINDOW_SIZE can cause
5350            // stream windows to exceed 2^31-1, which is a flow control error.
5351            match stream.window.checked_add(delta) {
5352                Some(new_window) => {
5353                    open_window |= stream.window <= 0 && new_window > 0;
5354                    stream.window = new_window;
5355                }
5356                None => return true,
5357            }
5358        }
5359        trace!(
5360            "{} UPDATE INIT WINDOW: {} {} {:?}",
5361            log_context!(self),
5362            delta,
5363            open_window,
5364            self.readiness
5365        );
5366        if open_window {
5367            self.readiness.arm_writable();
5368        }
5369        self.peer_settings.settings_initial_window_size = value;
5370        false
5371    }
5372
5373    pub fn force_disconnect(&mut self) -> MuxResult {
5374        self.state = H2State::Error;
5375        match &mut self.position {
5376            Position::Client(_, _, status) => {
5377                *status = BackendStatus::Disconnecting;
5378                self.readiness.event = Ready::HUP;
5379                debug!(
5380                    "{} H2 force_disconnect client: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
5381                    log_context!(self),
5382                    self.state,
5383                    self.streams.len(),
5384                    self.expect_write,
5385                    self.socket.socket_wants_write(),
5386                    self.readiness
5387                );
5388                MuxResult::Continue
5389            }
5390            Position::Server => {
5391                if self.peer_gone_after_final_goaway() {
5392                    return MuxResult::CloseSession;
5393                }
5394                // Don't disconnect immediately if rustls still has buffered TLS
5395                // records. Returning CloseSession here triggers shutdown(Write)
5396                // which sends FIN — but any TLS records still in rustls's buffer
5397                // (not yet flushed to the TCP send buffer) are lost, causing the
5398                // client to see "TLS decode error / unexpected eof".
5399                // Instead, keep WRITABLE interest and let the writable path flush.
5400                if self.socket.socket_wants_write() {
5401                    debug!(
5402                        "{} H2 force_disconnect delaying close: state={:?}, streams={}, expect_write={:?}, wants_write=true, readiness={:?}",
5403                        log_context!(self),
5404                        self.state,
5405                        self.streams.len(),
5406                        self.expect_write,
5407                        self.readiness
5408                    );
5409                    self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
5410                    self.ensure_tls_flushed();
5411                    MuxResult::Continue
5412                } else {
5413                    debug!(
5414                        "{} H2 force_disconnect closing session: state={:?}, streams={}, expect_write={:?}, wants_write=false, readiness={:?}",
5415                        log_context!(self),
5416                        self.state,
5417                        self.streams.len(),
5418                        self.expect_write,
5419                        self.readiness
5420                    );
5421                    MuxResult::CloseSession
5422                }
5423            }
5424        }
5425    }
5426
5427    pub fn close<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E)
5428    where
5429        E: Endpoint,
5430        L: ListenerHandler + L7ListenerHandler,
5431    {
5432        match self.position {
5433            Position::Client(_, _, BackendStatus::KeepAlive) => {
5434                error!(
5435                    "{} H2 connections do not use KeepAlive backend status",
5436                    log_context!(self)
5437                );
5438                return;
5439            }
5440            Position::Client(..) => {}
5441            Position::Server => {
5442                let tls_pending_before = self.socket.socket_wants_write();
5443                if !self.streams.is_empty() || tls_pending_before || self.expect_write.is_some() {
5444                    debug!(
5445                        "{} H2 close with active state: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
5446                        log_context!(self),
5447                        self.state,
5448                        self.streams.len(),
5449                        self.expect_write,
5450                        tls_pending_before,
5451                        self.readiness
5452                    );
5453                    for (stream_id, global_stream_id) in &self.streams {
5454                        let stream = &context.streams[*global_stream_id];
5455                        debug!(
5456                            "{}   close stream id={} gid={}: state={:?}, front_eos={}, back_eos={}, front_phase={:?}, back_phase={:?}, front_completed={}, back_completed={}",
5457                            log_context!(self),
5458                            stream_id,
5459                            global_stream_id,
5460                            stream.state,
5461                            stream.front_received_end_of_stream,
5462                            stream.back_received_end_of_stream,
5463                            stream.front.parsing_phase,
5464                            stream.back.parsing_phase,
5465                            stream.front.is_completed(),
5466                            stream.back.is_completed()
5467                        );
5468                    }
5469                }
5470                if !self.close_notify_sent {
5471                    trace!("{} H2 SENDING CLOSE NOTIFY", log_context!(self));
5472                }
5473                let (tls_pending_after, drain_rounds) =
5474                    drain_tls_close_notify(&mut self.socket, &mut self.close_notify_sent);
5475                if tls_pending_after {
5476                    // Severity tiering: key on stream-count + close-state, not
5477                    // peer-vs-operator. Composes with the send-side `H2Error`
5478                    // variant tier in `goaway()` — both rules demote benign
5479                    // paths and keep loss-bearing paths loud.
5480                    //
5481                    // - `streams != 0`           -> `error!`: live streams at
5482                    //   close time, response-byte loss is possible.
5483                    // - `streams == 0` AND state in {GoAway, Error}
5484                    //                             -> `warn!`: idle close after
5485                    //   a GOAWAY exchange (peer-initiated abort or our own
5486                    //   graceful drain). What's stranded is best-effort
5487                    //   GOAWAY/close_notify; no application data was queued.
5488                    // - `streams == 0` from any other state
5489                    //                             -> `error!`: unexpected
5490                    //   teardown path (no GOAWAY exchange) — keep loud so
5491                    //   unknown failure modes surface.
5492                    if !self.streams.is_empty() {
5493                        error!(
5494                            "{} TLS buffer NOT fully drained on close: \
5495                             pending_before={}, pending_after={}, drain_rounds={}, \
5496                             state={:?}, streams={}, expect_write={:?}, \
5497                             close_notify_sent={}, readiness={:?}",
5498                            log_context!(self),
5499                            tls_pending_before,
5500                            tls_pending_after,
5501                            drain_rounds,
5502                            self.state,
5503                            self.streams.len(),
5504                            self.expect_write,
5505                            self.close_notify_sent,
5506                            self.readiness
5507                        );
5508                    } else if matches!(self.state, H2State::GoAway | H2State::Error) {
5509                        warn!(
5510                            "{} TLS buffer NOT fully drained on close: \
5511                             pending_before={}, pending_after={}, drain_rounds={}, \
5512                             state={:?}, streams={}, expect_write={:?}, \
5513                             close_notify_sent={}, readiness={:?}",
5514                            log_context!(self),
5515                            tls_pending_before,
5516                            tls_pending_after,
5517                            drain_rounds,
5518                            self.state,
5519                            self.streams.len(),
5520                            self.expect_write,
5521                            self.close_notify_sent,
5522                            self.readiness
5523                        );
5524                    } else {
5525                        error!(
5526                            "{} TLS buffer NOT fully drained on close: \
5527                             pending_before={}, pending_after={}, drain_rounds={}, \
5528                             state={:?}, streams={}, expect_write={:?}, \
5529                             close_notify_sent={}, readiness={:?}",
5530                            log_context!(self),
5531                            tls_pending_before,
5532                            tls_pending_after,
5533                            drain_rounds,
5534                            self.state,
5535                            self.streams.len(),
5536                            self.expect_write,
5537                            self.close_notify_sent,
5538                            self.readiness
5539                        );
5540                    }
5541                }
5542                return;
5543            }
5544        }
5545        // reconnection is handled by the server for each stream separately
5546        for global_stream_id in self.streams.values() {
5547            trace!("{} end stream: {}", log_context!(self), global_stream_id);
5548            if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5549                endpoint.end_stream(token, *global_stream_id, context);
5550            }
5551        }
5552    }
5553
5554    /// Reset a stream: tear down kawa state, emit `RST_STREAM` on the wire,
5555    /// and record MadeYouReset accounting.
5556    ///
5557    /// `wire_stream_id` is the on-wire `StreamId`; `stream_id` is the internal
5558    /// `GlobalStreamId` slot. Callers already carry both so we pass them
5559    /// explicitly rather than scanning `self.streams`. The wire id is threaded
5560    /// into [`Self::enqueue_rst`] which queues the frame for serialisation in
5561    /// [`Self::flush_pending_control_frames`] on the next writable tick —
5562    /// independent of whether the caller immediately evicts the slot via
5563    /// `remove_dead_stream` (which they usually do). This is what guarantees
5564    /// the RST reaches the peer for malformed HEADERS / flow-control /
5565    /// content-length violations flagged by h2spec 2.0.
5566    pub fn reset_stream<E, L>(
5567        &mut self,
5568        wire_stream_id: StreamId,
5569        stream_id: GlobalStreamId,
5570        context: &mut Context<L>,
5571        mut endpoint: E,
5572        error: H2Error,
5573    ) -> MuxResult
5574    where
5575        E: Endpoint,
5576        L: ListenerHandler + L7ListenerHandler,
5577    {
5578        // Compute totals before taking mutable borrows on the target stream.
5579        let reset_byte_totals = self.compute_stream_byte_totals(context);
5580        context.unlink_stream(stream_id);
5581        let stream = &mut context.streams[stream_id];
5582        trace!(
5583            "{} reset H2 stream {}: {:#?}",
5584            log_context!(self),
5585            stream_id,
5586            stream.context
5587        );
5588        let old_state = std::mem::replace(&mut stream.state, StreamState::Unlinked);
5589        forcefully_terminate_answer(stream, &mut self.readiness, error);
5590        let linked_token = if let StreamState::Linked(token) = old_state {
5591            Some(token)
5592        } else {
5593            None
5594        };
5595        let (client_rtt, server_rtt) =
5596            Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
5597        if let Some(token) = linked_token {
5598            endpoint.end_stream(token, stream_id, context);
5599        }
5600        // Emit access log for server-side resets on streams that had active requests
5601        if self.position.is_server()
5602            && matches!(old_state, StreamState::Link | StreamState::Linked(_))
5603        {
5604            let stream = &mut context.streams[stream_id];
5605            self.distribute_overhead(&mut stream.metrics, reset_byte_totals);
5606            stream.metrics.backend_stop();
5607            stream.generate_access_log(
5608                true,
5609                Some("H2::Reset"),
5610                context.listener.clone(),
5611                client_rtt,
5612                server_rtt,
5613            );
5614            stream.metrics.reset();
5615        }
5616        // Queue the RST for wire emission. Independent of the owning stream
5617        // remaining in `self.streams` — callers typically follow this with
5618        // `remove_dead_stream`, which would otherwise evict the slot before
5619        // `write_streams` could run `kawa.prepare` against the converter.
5620        //
5621        // `enqueue_rst` performs every accounting side-effect at queue
5622        // time (per-error counter, global tx counter, CVE-2025-8671
5623        // MadeYouReset lifetime cap). Graceful `NoError` cancels —
5624        // stream recycle, propagated client-side cancel — are exempt
5625        // from the lifetime cap inside the accounting helper itself.
5626        if let Some(result) = self.enqueue_rst(wire_stream_id, error) {
5627            return result;
5628        }
5629        MuxResult::Continue
5630    }
5631
5632    pub fn end_stream<L>(&mut self, stream_gid: GlobalStreamId, context: &mut Context<L>)
5633    where
5634        L: ListenerHandler + L7ListenerHandler,
5635    {
5636        context.unlink_stream(stream_gid);
5637        let stream_context = context.http_context(stream_gid);
5638        trace!(
5639            "{} end H2 stream {}: {:#?}",
5640            log_context!(self),
5641            stream_gid,
5642            stream_context
5643        );
5644        match self.position {
5645            Position::Client(..) => {
5646                // Resolve the wire StreamId for this gid up front so the
5647                // subsequent cleanup does not hold an iterator borrow on
5648                // `self.streams` while also mutating it.
5649                let wire_stream_id = self
5650                    .streams
5651                    .iter()
5652                    .find_map(|(&sid, &gid)| (gid == stream_gid).then_some(sid));
5653                if let Some(id) = wire_stream_id {
5654                    // Only send RST_STREAM if the stream hasn't fully completed.
5655                    // If both request and response are terminated, the stream is
5656                    // already in "closed" state (RFC 9113 §5.1) — sending RST_STREAM
5657                    // on a closed stream would be a protocol error that could cause
5658                    // the H2 peer to close the entire connection.
5659                    let stream = &context.streams[stream_gid];
5660                    let fully_completed =
5661                        stream.back_received_end_of_stream && stream.front.is_terminated();
5662                    if !fully_completed && !self.rst_sent.contains(&id) {
5663                        let kawa = &mut self.zero;
5664                        let mut frame = [0; 13];
5665                        if let Ok((_, _size)) =
5666                            serializer::gen_rst_stream(&mut frame, id, H2Error::Cancel)
5667                        {
5668                            let buf = kawa.storage.space();
5669                            if buf.len() >= frame.len() {
5670                                buf[..frame.len()].copy_from_slice(&frame);
5671                                kawa.storage.fill(frame.len());
5672                                incr!(names::h2::FRAMES_TX_RST_STREAM);
5673                                count!(metric_for_rst_stream_sent(H2Error::Cancel), 1);
5674                                self.readiness.arm_writable();
5675                                self.rst_sent.insert(id);
5676                            }
5677                        }
5678                    }
5679                    // Retire the stream and invalidate expect_write/expect_read
5680                    // if they still reference this gid — the slot may be popped
5681                    // by `shrink_trailing_recycle` on the next create_stream.
5682                    self.remove_dead_stream(id, stream_gid);
5683                    if context.streams[stream_gid].state != StreamState::Recycle {
5684                        context.streams[stream_gid].state = StreamState::Unlinked;
5685                    }
5686                    return;
5687                }
5688                error!(
5689                    "{} end_stream called for unknown global_stream_id {}",
5690                    log_context!(self),
5691                    stream_gid
5692                );
5693            }
5694            Position::Server => {
5695                let answers_rc = context.listener.borrow().get_answers().clone();
5696                let stream = &mut context.streams[stream_gid];
5697                match end_stream_decision(stream) {
5698                    EndStreamAction::ForwardTerminated => {
5699                        #[cfg(debug_assertions)]
5700                        context
5701                            .debug
5702                            .push(DebugEvent::Str(format!("Close terminated {stream_gid}")));
5703                        debug!(
5704                            "{} CLOSING H2 TERMINATED STREAM {} {:?}",
5705                            log_context!(self),
5706                            stream_gid,
5707                            stream
5708                        );
5709                        stream.state = StreamState::Unlinked;
5710                        self.readiness.arm_writable();
5711                        context.debug.set_interesting(true);
5712                    }
5713                    EndStreamAction::CloseDelimited => {
5714                        debug!(
5715                            "{} CLOSE DELIMITED H2 STREAM {} {:?}",
5716                            log_context!(self),
5717                            stream_gid,
5718                            stream
5719                        );
5720                        stream.back.push_block(kawa::Block::Flags(kawa::Flags {
5721                            end_body: true,
5722                            end_chunk: false,
5723                            end_header: false,
5724                            end_stream: true,
5725                        }));
5726                        stream.back.parsing_phase = kawa::ParsingPhase::Terminated;
5727                        stream.state = StreamState::Unlinked;
5728                        self.readiness.arm_writable();
5729                        context.debug.set_interesting(true);
5730                    }
5731                    EndStreamAction::ForwardUnterminated => {
5732                        #[cfg(debug_assertions)]
5733                        context
5734                            .debug
5735                            .push(DebugEvent::Str(format!("Close unterminated {stream_gid}")));
5736                        debug!(
5737                            "{} CLOSING H2 UNTERMINATED STREAM {} {:?}",
5738                            log_context!(self),
5739                            stream_gid,
5740                            stream
5741                        );
5742                        forcefully_terminate_answer(
5743                            stream,
5744                            &mut self.readiness,
5745                            H2Error::InternalError,
5746                        );
5747                        context.debug.set_interesting(true);
5748                    }
5749                    EndStreamAction::SendDefault(status) => {
5750                        #[cfg(debug_assertions)]
5751                        context.debug.push(DebugEvent::Str(format!(
5752                            "Can't retry, send {status} on {stream_gid}"
5753                        )));
5754                        let answers = answers_rc.borrow();
5755                        set_default_answer(stream, &mut self.readiness, status, &answers);
5756                    }
5757                    EndStreamAction::Reconnect => {
5758                        debug!("{} H2 RECONNECT", log_context!(self));
5759                        #[cfg(debug_assertions)]
5760                        context
5761                            .debug
5762                            .push(DebugEvent::Str(format!("Retry {stream_gid}")));
5763                        stream.state = StreamState::Link;
5764                        context.pending_links.push_back(stream_gid);
5765                    }
5766                }
5767            }
5768        }
5769    }
5770
5771    pub fn start_stream<L>(&mut self, stream: GlobalStreamId, _context: &mut Context<L>) -> bool
5772    where
5773        L: ListenerHandler + L7ListenerHandler,
5774    {
5775        // RFC 9113 §6.8: reject new streams on a draining connection
5776        if self.drain.draining {
5777            error!(
5778                "{} Cannot open new stream on draining connection (stream {})",
5779                log_context!(self),
5780                stream
5781            );
5782            return false;
5783        }
5784        // RFC 9113 §5.1.2: respect peer's max concurrent streams limit
5785        if self.streams.len() >= self.peer_settings.settings_max_concurrent_streams as usize {
5786            error!(
5787                "{} Cannot open new stream: active={} >= peer max_concurrent_streams={}",
5788                log_context!(self),
5789                self.streams.len(),
5790                self.peer_settings.settings_max_concurrent_streams
5791            );
5792            return false;
5793        }
5794        trace!(
5795            "{} start new H2 stream {} {:?}",
5796            log_context!(self),
5797            stream,
5798            self.readiness
5799        );
5800        let Some(stream_id) = self.new_stream_id() else {
5801            // Pass 4 Medium #5: the client-initiated stream-ID space
5802            // (31 bits, odd only) is exhausted. The backend is now useless
5803            // for new requests — gracefully drain it. Without this
5804            // transition, the Connection lingers in `Connected` state and
5805            // every subsequent request returns 503 because `start_stream`
5806            // keeps returning false.
5807            //
5808            // The session envelope is hoisted to a local because the
5809            // `match &mut self.position` below holds a mutable borrow on
5810            // `self.position`, and `log_context!(self)` reads that field
5811            // for its `position={...}` slot — calling the macro inside the
5812            // match arms would conflict with the active borrow. The
5813            // bidirectional regression guard in `lib/tests/log_layout.rs`
5814            // (and the matching scanner in `lib/build.rs`) recognises this
5815            // shape by scanning backward as well as forward from each log
5816            // call.
5817            let context = log_context!(self);
5818            match &mut self.position {
5819                Position::Client(cluster_id, backend, status) => {
5820                    let backend_addr = backend.borrow().address;
5821                    let cluster = cluster_id.clone();
5822                    info!(
5823                        "{} H2 backend stream IDs exhausted (cluster={}, backend={:?}) — draining",
5824                        context, cluster, backend_addr
5825                    );
5826                    *status = BackendStatus::Disconnecting;
5827                }
5828                Position::Server => {
5829                    error!(
5830                        "{} H2 server stream IDs exhausted — sending graceful GOAWAY",
5831                        context
5832                    );
5833                }
5834            }
5835            self.graceful_goaway();
5836            return false;
5837        };
5838        self.streams.insert(stream_id, stream);
5839        self.stream_last_activity_at
5840            .insert(stream_id, Instant::now());
5841        self.readiness.arm_writable();
5842        true
5843    }
5844}
5845
5846#[cfg(test)]
5847mod tests {
5848    use std::{cell::RefCell, rc::Rc};
5849
5850    use super::*;
5851    use crate::{pool::Pool, protocol::kawa_h1::editor::HttpContext};
5852
5853    // ── H2FloodDetector ──────────────────────────────────────────────────
5854
5855    #[test]
5856    fn test_flood_detector_no_flood_below_threshold() {
5857        let config = H2FloodConfig::default();
5858        let mut detector = H2FloodDetector::new(config);
5859
5860        // All counters at zero -> no flood
5861        assert!(detector.check_flood().is_none());
5862
5863        // Increment each counter to exactly the threshold (not exceeding)
5864        detector.rst_stream_count = config.max_rst_stream_per_window;
5865        detector.ping_count = config.max_ping_per_window;
5866        detector.settings_count = config.max_settings_per_window;
5867        detector.empty_data_count = config.max_empty_data_per_window;
5868        detector.continuation_count = config.max_continuation_frames;
5869        detector.glitch_count = config.max_glitch_count;
5870        // At threshold but not exceeding -> no flood
5871        assert!(detector.check_flood().is_none());
5872    }
5873
5874    #[test]
5875    fn test_flood_detector_detects_rapid_reset() {
5876        let config = H2FloodConfig::default();
5877        let mut detector = H2FloodDetector::new(config);
5878
5879        detector.rst_stream_count = config.max_rst_stream_per_window + 1;
5880        assert!(matches!(
5881            detector.check_flood(),
5882            Some(H2FloodViolation {
5883                error: H2Error::EnhanceYourCalm,
5884                ..
5885            })
5886        ));
5887    }
5888
5889    #[test]
5890    fn test_flood_detector_detects_ping_flood() {
5891        let config = H2FloodConfig::default();
5892        let mut detector = H2FloodDetector::new(config);
5893
5894        detector.ping_count = config.max_ping_per_window + 1;
5895        assert!(matches!(
5896            detector.check_flood(),
5897            Some(H2FloodViolation {
5898                error: H2Error::EnhanceYourCalm,
5899                ..
5900            })
5901        ));
5902    }
5903
5904    #[test]
5905    fn test_flood_detector_detects_settings_flood() {
5906        let config = H2FloodConfig::default();
5907        let mut detector = H2FloodDetector::new(config);
5908
5909        detector.settings_count = config.max_settings_per_window + 1;
5910        assert!(matches!(
5911            detector.check_flood(),
5912            Some(H2FloodViolation {
5913                error: H2Error::EnhanceYourCalm,
5914                ..
5915            })
5916        ));
5917    }
5918
5919    #[test]
5920    fn test_flood_detector_detects_empty_data_flood() {
5921        let config = H2FloodConfig::default();
5922        let mut detector = H2FloodDetector::new(config);
5923
5924        detector.empty_data_count = config.max_empty_data_per_window + 1;
5925        assert!(matches!(
5926            detector.check_flood(),
5927            Some(H2FloodViolation {
5928                error: H2Error::EnhanceYourCalm,
5929                ..
5930            })
5931        ));
5932    }
5933
5934    #[test]
5935    fn test_flood_detector_detects_continuation_flood() {
5936        let config = H2FloodConfig::default();
5937        let mut detector = H2FloodDetector::new(config);
5938
5939        detector.continuation_count = config.max_continuation_frames + 1;
5940        assert!(matches!(
5941            detector.check_flood(),
5942            Some(H2FloodViolation {
5943                error: H2Error::EnhanceYourCalm,
5944                ..
5945            })
5946        ));
5947    }
5948
5949    #[test]
5950    fn test_flood_detector_detects_header_size_flood() {
5951        let config = H2FloodConfig::default();
5952        let mut detector = H2FloodDetector::new(config);
5953
5954        detector.accumulated_header_size = MAX_HEADER_LIST_SIZE as u32 + 1;
5955        assert!(matches!(
5956            detector.check_flood(),
5957            Some(H2FloodViolation {
5958                error: H2Error::EnhanceYourCalm,
5959                ..
5960            })
5961        ));
5962    }
5963
5964    #[test]
5965    fn test_flood_detector_detects_glitch_flood() {
5966        let config = H2FloodConfig::default();
5967        let mut detector = H2FloodDetector::new(config);
5968
5969        detector.glitch_count = config.max_glitch_count + 1;
5970        assert!(matches!(
5971            detector.check_flood(),
5972            Some(H2FloodViolation {
5973                error: H2Error::EnhanceYourCalm,
5974                ..
5975            })
5976        ));
5977    }
5978
5979    #[test]
5980    fn test_flood_detector_custom_thresholds() {
5981        let config = H2FloodConfig {
5982            max_rst_stream_per_window: 5,
5983            max_ping_per_window: 10,
5984            max_settings_per_window: 3,
5985            max_empty_data_per_window: 8,
5986            max_continuation_frames: 2,
5987            max_glitch_count: 15,
5988            ..H2FloodConfig::default()
5989        };
5990        let mut detector = H2FloodDetector::new(config);
5991
5992        // Below custom threshold -> no flood
5993        detector.rst_stream_count = 5;
5994        assert!(detector.check_flood().is_none());
5995
5996        // Above custom threshold -> flood
5997        detector.rst_stream_count = 6;
5998        assert!(matches!(
5999            detector.check_flood(),
6000            Some(H2FloodViolation {
6001                error: H2Error::EnhanceYourCalm,
6002                ..
6003            })
6004        ));
6005    }
6006
6007    #[test]
6008    fn test_flood_detector_reset_continuation() {
6009        let config = H2FloodConfig::default();
6010        let mut detector = H2FloodDetector::new(config);
6011
6012        detector.continuation_count = 15;
6013        detector.accumulated_header_size = 30000;
6014
6015        detector.reset_continuation();
6016
6017        assert_eq!(detector.continuation_count, 0);
6018        assert_eq!(detector.accumulated_header_size, 0);
6019    }
6020
6021    #[test]
6022    fn test_flood_detector_half_decay_on_window_expiry() {
6023        let config = H2FloodConfig::default();
6024        let mut detector = H2FloodDetector::new(config);
6025
6026        detector.rst_stream_count = 80;
6027        detector.ping_count = 60;
6028        detector.settings_count = 40;
6029        detector.empty_data_count = 20;
6030        detector.window_update_stream0_count = 90;
6031        detector.glitch_count = 50;
6032
6033        // Force window expiry by setting window_start to the past
6034        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6035
6036        // check_flood calls maybe_reset_window which halves counters
6037        let _ = detector.check_flood();
6038
6039        assert_eq!(detector.rst_stream_count, 40);
6040        assert_eq!(detector.ping_count, 30);
6041        assert_eq!(detector.settings_count, 20);
6042        assert_eq!(detector.empty_data_count, 10);
6043        assert_eq!(detector.window_update_stream0_count, 45);
6044        assert_eq!(detector.glitch_count, 25);
6045    }
6046
6047    #[test]
6048    fn test_flood_detector_window_update_stream0_trips_at_threshold() {
6049        let config = H2FloodConfig {
6050            max_window_update_stream0_per_window: 5,
6051            ..H2FloodConfig::default()
6052        };
6053        let mut detector = H2FloodDetector::new(config);
6054
6055        // At threshold — no flood yet (strict greater-than, matches existing counters).
6056        detector.window_update_stream0_count = 5;
6057        assert!(detector.check_flood().is_none());
6058
6059        // Above threshold — flood with the correct violation reason + metric key.
6060        detector.window_update_stream0_count = 6;
6061        let violation = detector
6062            .check_flood()
6063            .expect("WINDOW_UPDATE stream-0 flood must trip above threshold");
6064        assert_eq!(violation.error, H2Error::EnhanceYourCalm);
6065        assert_eq!(violation.reason, "WINDOW_UPDATE stream 0");
6066        assert_eq!(
6067            violation.metric_key,
6068            "h2.flood.violation.window_update_stream0_window"
6069        );
6070        assert_eq!(violation.count, 6);
6071        assert_eq!(violation.threshold, 5);
6072    }
6073
6074    #[test]
6075    fn test_flood_detector_window_update_stream0_honours_default() {
6076        // Default threshold must match the documented constant so operators
6077        // can reason about behaviour without reading code.
6078        let detector = H2FloodDetector::default();
6079        assert_eq!(
6080            detector.config.max_window_update_stream0_per_window,
6081            DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW
6082        );
6083        assert_eq!(detector.window_update_stream0_count, 0);
6084    }
6085
6086    #[test]
6087    fn test_flood_detector_decay_prevents_flood() {
6088        let config = H2FloodConfig {
6089            max_rst_stream_per_window: 10,
6090            ..H2FloodConfig::default()
6091        };
6092        let mut detector = H2FloodDetector::new(config);
6093
6094        // Set counter just above threshold
6095        detector.rst_stream_count = 12;
6096
6097        // Without decay -> flood
6098        assert!(matches!(
6099            detector.check_flood(),
6100            Some(H2FloodViolation {
6101                error: H2Error::EnhanceYourCalm,
6102                ..
6103            })
6104        ));
6105
6106        // Reset and simulate window expiry
6107        detector.rst_stream_count = 12;
6108        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6109
6110        // After decay: 12/2 = 6, which is below threshold 10 -> no flood
6111        assert!(detector.check_flood().is_none());
6112    }
6113
6114    #[test]
6115    fn test_flood_detector_lifetime_rst_cap_triggers_enhance_your_calm() {
6116        // CVE-2023-44487 Rapid Reset: a patient attacker that stays under
6117        // the half-decaying per-window threshold must still be stopped by
6118        // the lifetime cap. Simulate a response-started RST (no abusive
6119        // counter bump) so only the lifetime ceiling is tested.
6120        let mut detector = H2FloodDetector::default();
6121        for _ in 0..DEFAULT_MAX_RST_STREAM_LIFETIME {
6122            assert!(detector.record_rst_lifetime(true).is_none());
6123        }
6124        assert_eq!(
6125            detector.total_rst_received_lifetime,
6126            DEFAULT_MAX_RST_STREAM_LIFETIME
6127        );
6128        assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6129        // Next RST crosses the ceiling.
6130        assert!(matches!(
6131            detector.record_rst_lifetime(true),
6132            Some(H2FloodViolation {
6133                error: H2Error::EnhanceYourCalm,
6134                ..
6135            })
6136        ));
6137    }
6138
6139    #[test]
6140    fn test_flood_detector_abusive_rst_cap_triggers_first() {
6141        // Pre-response-start RSTs have a much lower ceiling; they trip
6142        // well before the generic lifetime cap.
6143        let mut detector = H2FloodDetector::default();
6144        for _ in 0..DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME {
6145            assert!(detector.record_rst_lifetime(false).is_none());
6146        }
6147        assert_eq!(
6148            detector.total_abusive_rst_received_lifetime,
6149            DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME
6150        );
6151        assert!(matches!(
6152            detector.record_rst_lifetime(false),
6153            Some(H2FloodViolation {
6154                error: H2Error::EnhanceYourCalm,
6155                ..
6156            })
6157        ));
6158    }
6159
6160    #[test]
6161    fn test_flood_detector_emitted_rst_below_threshold_is_clean() {
6162        // Server may legitimately RST some streams (protocol errors,
6163        // client-side abuse caught by other mitigations). Staying at the
6164        // threshold must not trip the ceiling.
6165        let mut detector = H2FloodDetector::default();
6166        for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6167            assert!(detector.record_rst_emitted().is_none());
6168        }
6169        assert_eq!(
6170            detector.total_rst_streams_emitted_lifetime,
6171            DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME
6172        );
6173    }
6174
6175    #[test]
6176    fn test_flood_detector_emitted_rst_cap_triggers_made_you_reset() {
6177        // CVE-2025-8671 MadeYouReset: unbounded server-emitted RST_STREAM is
6178        // a DoS vector equivalent to Rapid Reset with the emission direction
6179        // flipped. Crossing the ceiling must surface a EnhanceYourCalm
6180        // violation so the caller can GOAWAY.
6181        let mut detector = H2FloodDetector::default();
6182        for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6183            assert!(detector.record_rst_emitted().is_none());
6184        }
6185        let violation = detector
6186            .record_rst_emitted()
6187            .expect("emitting past the cap should produce a violation");
6188        assert!(matches!(
6189            violation,
6190            H2FloodViolation {
6191                error: H2Error::EnhanceYourCalm,
6192                reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
6193                ..
6194            }
6195        ));
6196        assert_eq!(violation.count, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME + 1);
6197        assert_eq!(violation.threshold, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME);
6198    }
6199
6200    #[test]
6201    fn test_flood_detector_emitted_rst_counter_does_not_decay() {
6202        // Unlike the windowed rst_stream_count, the emitted lifetime counter
6203        // is strictly monotonic — a patient attacker cannot reset it by
6204        // waiting out a window. maybe_reset_window must NOT touch it.
6205        let mut detector = H2FloodDetector::default();
6206        for _ in 0..10 {
6207            detector.record_rst_emitted();
6208        }
6209        detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6210        // Force a window reset through check_flood.
6211        let _ = detector.check_flood();
6212        assert_eq!(detector.total_rst_streams_emitted_lifetime, 10);
6213    }
6214
6215    /// Every violation kind must carry a metric_key under the agreed
6216    /// `h2.flood.violation.*` namespace, and the keys must be unique. The
6217    /// statsd counter at `handle_flood_violation` reads `violation.metric_key`
6218    /// directly — drift between the construction site and the metric name
6219    /// would silently lose alerting on a CVE mitigation.
6220    #[test]
6221    fn test_flood_violation_metric_keys_are_unique_and_namespaced() {
6222        // Helper: run `record_rst_lifetime` until it trips, returning the metric_key.
6223        fn key_from_rst_lifetime(response_started: bool) -> &'static str {
6224            let mut detector = H2FloodDetector::default();
6225            loop {
6226                if let Some(v) = detector.record_rst_lifetime(response_started) {
6227                    return v.metric_key;
6228                }
6229            }
6230        }
6231
6232        // Helper: run `record_rst_emitted` until it trips, returning the metric_key.
6233        fn key_from_rst_emitted() -> &'static str {
6234            let mut detector = H2FloodDetector::default();
6235            loop {
6236                if let Some(v) = detector.record_rst_emitted() {
6237                    return v.metric_key;
6238                }
6239            }
6240        }
6241
6242        // Helper: drive a single `check_flood` counter past its threshold.
6243        fn key_from_check_flood(setup: impl FnOnce(&mut H2FloodDetector)) -> &'static str {
6244            let mut detector = H2FloodDetector::default();
6245            setup(&mut detector);
6246            detector
6247                .check_flood()
6248                .expect("setup should always trip a flood")
6249                .metric_key
6250        }
6251
6252        let keys: [&'static str; 12] = [
6253            // Lifetime methods on the detector itself.
6254            key_from_rst_lifetime(true),
6255            key_from_rst_lifetime(false),
6256            key_from_rst_emitted(),
6257            // `check_flood` arms.
6258            key_from_check_flood(|d| d.rst_stream_count = u32::MAX),
6259            key_from_check_flood(|d| d.ping_count = u32::MAX),
6260            key_from_check_flood(|d| d.total_ping_received_lifetime = u32::MAX),
6261            key_from_check_flood(|d| d.settings_count = u32::MAX),
6262            key_from_check_flood(|d| d.total_settings_received_lifetime = u32::MAX),
6263            key_from_check_flood(|d| d.empty_data_count = u32::MAX),
6264            key_from_check_flood(|d| d.continuation_count = u32::MAX),
6265            key_from_check_flood(|d| d.accumulated_header_size = u32::MAX),
6266            key_from_check_flood(|d| d.glitch_count = u32::MAX),
6267        ];
6268
6269        for key in keys {
6270            assert!(
6271                key.starts_with("h2.flood.violation."),
6272                "metric key {key} is missing the h2.flood.violation. prefix",
6273            );
6274        }
6275        let mut deduped = keys.to_vec();
6276        deduped.sort_unstable();
6277        deduped.dedup();
6278        assert_eq!(
6279            deduped.len(),
6280            keys.len(),
6281            "metric keys must be unique across violation kinds; collisions: {keys:?}",
6282        );
6283    }
6284
6285    /// All four `metric_for_*` helpers must yield distinct, namespaced keys for
6286    /// every RFC 9113 §7 error code. The macro behind them uses `concat!`, so a
6287    /// new H2Error variant fails the build inside the macro — but a typo in
6288    /// the helper prefix would silently land. Walk every (direction × kind)
6289    /// pair and dedupe the set.
6290    /// `h2_frame_rx_metric_key` must yield a distinct `&'static str` per
6291    /// `Frame::*` variant. The single dispatch site in `handle_frame` reads
6292    /// from this helper, so a typo or duplicate would silently clobber the
6293    /// frame-mix dashboard. Asserting the literal set lets us compare against
6294    /// `doc/configure.md` and the RFC 9113 §6 frame catalogue without
6295    /// reconstructing every Frame variant in the test.
6296    #[test]
6297    fn test_h2_frame_rx_metric_keys_are_unique_and_namespaced() {
6298        // Update this list whenever a new Frame variant is added — the helper
6299        // match is also exhaustive, so the build will already break there
6300        // before anyone notices the test missing a key.
6301        let expected: [&'static str; 11] = [
6302            "h2.frames.rx.data",
6303            "h2.frames.rx.headers",
6304            "h2.frames.rx.push_promise",
6305            "h2.frames.rx.priority",
6306            "h2.frames.rx.rst_stream",
6307            "h2.frames.rx.settings",
6308            "h2.frames.rx.ping",
6309            "h2.frames.rx.goaway",
6310            "h2.frames.rx.window_update",
6311            "h2.frames.rx.continuation",
6312            "h2.frames.rx.unknown",
6313        ];
6314
6315        for key in expected {
6316            assert!(
6317                key.starts_with("h2.frames.rx."),
6318                "metric key {key} is missing the h2.frames.rx. prefix",
6319            );
6320        }
6321        let mut deduped = expected.to_vec();
6322        deduped.sort_unstable();
6323        deduped.dedup();
6324        assert_eq!(
6325            deduped.len(),
6326            expected.len(),
6327            "frame-rx metric keys must be unique; collisions in: {expected:?}",
6328        );
6329
6330        // Spot-check the helper for the one variant we can construct without
6331        // borrowing into a frame body — `Frame::Unknown(u8)` is just a tag.
6332        assert_eq!(
6333            h2_frame_rx_metric_key(&Frame::Unknown(42)),
6334            "h2.frames.rx.unknown",
6335        );
6336    }
6337
6338    #[test]
6339    fn test_per_error_code_metric_keys_are_unique_and_namespaced() {
6340        const ALL_ERRORS: [H2Error; 14] = [
6341            H2Error::NoError,
6342            H2Error::ProtocolError,
6343            H2Error::InternalError,
6344            H2Error::FlowControlError,
6345            H2Error::SettingsTimeout,
6346            H2Error::StreamClosed,
6347            H2Error::FrameSizeError,
6348            H2Error::RefusedStream,
6349            H2Error::Cancel,
6350            H2Error::CompressionError,
6351            H2Error::ConnectError,
6352            H2Error::EnhanceYourCalm,
6353            H2Error::InadequateSecurity,
6354            H2Error::HTTP11Required,
6355        ];
6356
6357        let mut keys: Vec<&'static str> = Vec::new();
6358        for error in ALL_ERRORS {
6359            let code = error as u32;
6360            keys.push(metric_for_goaway_sent(error));
6361            keys.push(metric_for_goaway_received(code));
6362            keys.push(metric_for_rst_stream_sent(error));
6363            keys.push(metric_for_rst_stream_received(code));
6364        }
6365        // …plus the four `unknown_error` fallbacks for codes outside RFC 9113 §7.
6366        let unknown_code = 0xff;
6367        assert!(H2Error::try_from(unknown_code).is_err());
6368        keys.push(metric_for_goaway_received(unknown_code));
6369        keys.push(metric_for_rst_stream_received(unknown_code));
6370        // …and the dedicated Rapid Reset signature counter.
6371        keys.push(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START);
6372
6373        for key in &keys {
6374            assert!(
6375                key.starts_with("h2.goaway.sent.")
6376                    || key.starts_with("h2.goaway.received.")
6377                    || key.starts_with("h2.rst_stream.sent.")
6378                    || key.starts_with("h2.rst_stream.received."),
6379                "metric key {key} does not match a known per-error-code namespace",
6380            );
6381        }
6382        let mut deduped = keys.clone();
6383        deduped.sort_unstable();
6384        deduped.dedup();
6385        assert_eq!(
6386            deduped.len(),
6387            keys.len(),
6388            "per-error-code metric keys must be unique; collisions in: {keys:?}",
6389        );
6390    }
6391
6392    #[test]
6393    fn test_flood_detector_response_started_rst_not_abusive() {
6394        // When the backend response has begun, the RST is cheap for us
6395        // too — it only bumps the generic lifetime counter.
6396        let mut detector = H2FloodDetector::default();
6397        for _ in 0..(DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100) {
6398            assert!(detector.record_rst_lifetime(true).is_none());
6399        }
6400        assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6401        assert_eq!(
6402            detector.total_rst_received_lifetime,
6403            DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100
6404        );
6405    }
6406
6407    #[test]
6408    fn test_flood_detector_default_matches_new_default() {
6409        let from_default = H2FloodDetector::default();
6410        let from_new = H2FloodDetector::new(H2FloodConfig::default());
6411
6412        assert_eq!(from_default.rst_stream_count, from_new.rst_stream_count);
6413        assert_eq!(from_default.ping_count, from_new.ping_count);
6414        assert_eq!(from_default.settings_count, from_new.settings_count);
6415        assert_eq!(from_default.empty_data_count, from_new.empty_data_count);
6416        assert_eq!(from_default.continuation_count, from_new.continuation_count);
6417        assert_eq!(
6418            from_default.accumulated_header_size,
6419            from_new.accumulated_header_size
6420        );
6421        assert_eq!(from_default.glitch_count, from_new.glitch_count);
6422        assert_eq!(from_default.config, from_new.config);
6423    }
6424
6425    // ── Prioriser ────────────────────────────────────────────────────────
6426
6427    #[test]
6428    fn test_prioriser_defaults_for_unknown_stream() {
6429        let p = Prioriser::default();
6430        // Unknown stream -> RFC 9218 defaults: urgency 3, incremental false
6431        assert_eq!(p.get(&1), (3, false));
6432        assert_eq!(p.get(&999), (3, false));
6433    }
6434
6435    #[test]
6436    fn test_prioriser_push_rfc9218_and_get() {
6437        let mut p = Prioriser::default();
6438
6439        let invalid = p.push_priority(
6440            1,
6441            parser::PriorityPart::Rfc9218 {
6442                urgency: 0,
6443                incremental: true,
6444            },
6445        );
6446        assert!(!invalid);
6447        assert_eq!(p.get(&1), (0, true));
6448
6449        let invalid = p.push_priority(
6450            3,
6451            parser::PriorityPart::Rfc9218 {
6452                urgency: 7,
6453                incremental: false,
6454            },
6455        );
6456        assert!(!invalid);
6457        assert_eq!(p.get(&3), (7, false));
6458    }
6459
6460    #[test]
6461    fn test_prioriser_urgency_clamped_to_7() {
6462        let mut p = Prioriser::default();
6463
6464        p.push_priority(
6465            1,
6466            parser::PriorityPart::Rfc9218 {
6467                urgency: 255,
6468                incremental: false,
6469            },
6470        );
6471        assert_eq!(p.get(&1), (7, false));
6472    }
6473
6474    #[test]
6475    fn test_prioriser_update_priority() {
6476        let mut p = Prioriser::default();
6477
6478        p.push_priority(
6479            1,
6480            parser::PriorityPart::Rfc9218 {
6481                urgency: 3,
6482                incremental: false,
6483            },
6484        );
6485        assert_eq!(p.get(&1), (3, false));
6486
6487        // Update same stream
6488        p.push_priority(
6489            1,
6490            parser::PriorityPart::Rfc9218 {
6491                urgency: 1,
6492                incremental: true,
6493            },
6494        );
6495        assert_eq!(p.get(&1), (1, true));
6496    }
6497
6498    #[test]
6499    fn test_prioriser_remove() {
6500        let mut p = Prioriser::default();
6501
6502        p.push_priority(
6503            1,
6504            parser::PriorityPart::Rfc9218 {
6505                urgency: 0,
6506                incremental: true,
6507            },
6508        );
6509        assert_eq!(p.get(&1), (0, true));
6510
6511        p.remove(&1);
6512        // After removal, falls back to defaults
6513        assert_eq!(p.get(&1), (3, false));
6514    }
6515
6516    #[test]
6517    fn test_prioriser_rfc7540_self_dependency() {
6518        let mut p = Prioriser::default();
6519
6520        // Self-dependency should return true (invalid)
6521        let invalid = p.push_priority(
6522            5,
6523            parser::PriorityPart::Rfc7540 {
6524                stream_dependency: parser::StreamDependency {
6525                    exclusive: false,
6526                    stream_id: 5, // same as stream_id
6527                },
6528                weight: 16,
6529            },
6530        );
6531        assert!(invalid);
6532    }
6533
6534    #[test]
6535    fn test_prioriser_rfc7540_valid_dependency() {
6536        let mut p = Prioriser::default();
6537
6538        // Non-self dependency is valid (but ignored for scheduling)
6539        let invalid = p.push_priority(
6540            5,
6541            parser::PriorityPart::Rfc7540 {
6542                stream_dependency: parser::StreamDependency {
6543                    exclusive: false,
6544                    stream_id: 3, // different stream
6545                },
6546                weight: 16,
6547            },
6548        );
6549        assert!(!invalid);
6550        // Still returns defaults since RFC 7540 priority is ignored
6551        assert_eq!(p.get(&5), (3, false));
6552    }
6553
6554    #[test]
6555    fn test_prioriser_max_entries_cap() {
6556        let mut p = Prioriser::default();
6557
6558        // Fill up to MAX_PRIORITIES
6559        for i in 0..MAX_PRIORITIES as u32 {
6560            let stream_id = i * 2 + 1; // odd stream IDs
6561            p.push_priority(
6562                stream_id,
6563                parser::PriorityPart::Rfc9218 {
6564                    urgency: (i % 8) as u8,
6565                    incremental: false,
6566                },
6567            );
6568        }
6569
6570        // Next insert for a new stream should be silently rejected
6571        let next_id = (MAX_PRIORITIES as u32) * 2 + 1;
6572        let invalid = p.push_priority(
6573            next_id,
6574            parser::PriorityPart::Rfc9218 {
6575                urgency: 0,
6576                incremental: true,
6577            },
6578        );
6579        assert!(!invalid); // not a protocol error, just silently dropped
6580        assert_eq!(p.get(&next_id), (3, false)); // defaults, not stored
6581    }
6582
6583    #[test]
6584    fn test_prioriser_update_existing_at_cap() {
6585        let mut p = Prioriser::default();
6586
6587        // Fill to cap
6588        for i in 0..MAX_PRIORITIES as u32 {
6589            p.push_priority(
6590                i * 2 + 1,
6591                parser::PriorityPart::Rfc9218 {
6592                    urgency: 3,
6593                    incremental: false,
6594                },
6595            );
6596        }
6597
6598        // Updating an existing entry should still work even at cap
6599        p.push_priority(
6600            1,
6601            parser::PriorityPart::Rfc9218 {
6602                urgency: 0,
6603                incremental: true,
6604            },
6605        );
6606        assert_eq!(p.get(&1), (0, true));
6607    }
6608
6609    #[test]
6610    fn test_prioriser_guarded_accepts_open_stream() {
6611        let mut p = Prioriser::default();
6612        let mut open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6613        open.insert(3, 0);
6614        let invalid = p.push_priority_guarded(
6615            3,
6616            parser::PriorityPart::Rfc9218 {
6617                urgency: 1,
6618                incremental: false,
6619            },
6620            7,
6621            &open,
6622        );
6623        assert!(!invalid);
6624        assert_eq!(p.get(&3), (1, false));
6625    }
6626
6627    #[test]
6628    fn test_prioriser_guarded_accepts_idle_lookahead() {
6629        let mut p = Prioriser::default();
6630        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6631        // Just ahead of last_stream_id, within PRIORITY_IDLE_LOOKAHEAD.
6632        let invalid = p.push_priority_guarded(
6633            105,
6634            parser::PriorityPart::Rfc9218 {
6635                urgency: 2,
6636                incremental: true,
6637            },
6638            99,
6639            &open,
6640        );
6641        assert!(!invalid);
6642        assert_eq!(p.get(&105), (2, true));
6643    }
6644
6645    #[test]
6646    fn test_prioriser_guarded_drops_far_future_stream() {
6647        let mut p = Prioriser::default();
6648        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6649        // Beyond the 64-slot lookahead window.
6650        let invalid = p.push_priority_guarded(
6651            1_000_001,
6652            parser::PriorityPart::Rfc9218 {
6653                urgency: 0,
6654                incremental: false,
6655            },
6656            3,
6657            &open,
6658        );
6659        assert!(!invalid); // not a protocol error, just dropped
6660        // Default priority returned — no entry stored.
6661        assert_eq!(p.get(&1_000_001), (DEFAULT_URGENCY, false));
6662    }
6663
6664    #[test]
6665    fn test_prioriser_guarded_drops_closed_past_stream() {
6666        let mut p = Prioriser::default();
6667        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6668        // Past the counter and not open = already closed. Drop.
6669        let invalid = p.push_priority_guarded(
6670            3,
6671            parser::PriorityPart::Rfc9218 {
6672                urgency: 5,
6673                incremental: false,
6674            },
6675            99,
6676            &open,
6677        );
6678        assert!(!invalid);
6679        assert_eq!(p.get(&3), (DEFAULT_URGENCY, false));
6680    }
6681
6682    #[test]
6683    fn test_prioriser_guarded_cannot_flood_with_far_ids() {
6684        // Previously an attacker could pack MAX_PRIORITIES entries by picking
6685        // far-future stream IDs. The guard rejects them before the cap helps.
6686        let mut p = Prioriser::default();
6687        let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6688        for delta in 10_000..(10_000 + MAX_PRIORITIES as u32) {
6689            p.push_priority_guarded(
6690                delta,
6691                parser::PriorityPart::Rfc9218 {
6692                    urgency: 0,
6693                    incremental: false,
6694                },
6695                0,
6696                &open,
6697            );
6698        }
6699        assert_eq!(p.priorities.len(), 0);
6700    }
6701
6702    // ── RFC 9218 §4 round-robin rotation ───────────────────────────────
6703
6704    /// Helper: mark `stream_id` as (urgency, incremental) in the map.
6705    fn set_prio(p: &mut Prioriser, stream_id: StreamId, urgency: u8, incremental: bool) {
6706        p.push_priority(
6707            stream_id,
6708            parser::PriorityPart::Rfc9218 {
6709                urgency,
6710                incremental,
6711            },
6712        );
6713    }
6714
6715    #[test]
6716    fn test_apply_incremental_rotation_all_non_incremental_is_noop() {
6717        // Non-incremental streams keep the existing (urgency, stream_id) sort.
6718        let mut p = Prioriser::default();
6719        set_prio(&mut p, 1, 3, false);
6720        set_prio(&mut p, 3, 3, false);
6721        set_prio(&mut p, 5, 3, false);
6722
6723        let mut buf = vec![1u32, 3, 5];
6724        let count = p.apply_incremental_rotation(&mut buf);
6725        assert_eq!(count, 0);
6726        assert_eq!(buf, vec![1, 3, 5]);
6727    }
6728
6729    #[test]
6730    fn test_apply_incremental_rotation_moves_incremental_to_tail() {
6731        // Within a same-urgency bucket non-incremental must come before
6732        // incremental, each subrange staying ascending.
6733        let mut p = Prioriser::default();
6734        set_prio(&mut p, 1, 3, true);
6735        set_prio(&mut p, 3, 3, false);
6736        set_prio(&mut p, 5, 3, true);
6737        set_prio(&mut p, 7, 3, false);
6738
6739        let mut buf = vec![1u32, 3, 5, 7];
6740        let count = p.apply_incremental_rotation(&mut buf);
6741        assert_eq!(count, 2);
6742        // Non-incremental first (3, 7), then incremental (1, 5) — ascending
6743        // within each subrange before the cursor rotation.
6744        assert_eq!(buf, vec![3, 7, 1, 5]);
6745    }
6746
6747    #[test]
6748    fn test_apply_incremental_rotation_respects_urgency_buckets() {
6749        // Different urgency buckets must not be mixed.
6750        let mut p = Prioriser::default();
6751        set_prio(&mut p, 1, 0, true); // urgent incremental
6752        set_prio(&mut p, 3, 3, false); // default non-incremental
6753        set_prio(&mut p, 5, 3, true); // default incremental
6754        set_prio(&mut p, 7, 5, false); // low-priority non-incremental
6755
6756        // Input is pre-sorted by (urgency, id) as the scheduler does.
6757        let mut buf = vec![1u32, 3, 5, 7];
6758        let count = p.apply_incremental_rotation(&mut buf);
6759        assert_eq!(count, 2);
6760        // Bucket 0: [1] (alone, stays). Bucket 3: [3] non-inc, [5] inc.
6761        // Bucket 5: [7] alone. Cross-bucket order is preserved.
6762        assert_eq!(buf, vec![1, 3, 5, 7]);
6763    }
6764
6765    #[test]
6766    fn test_apply_incremental_rotation_rotates_by_cursor() {
6767        // Three same-urgency incremental streams: cursor advancement shifts
6768        // the bucket so the next pass starts after the previously fired ID.
6769        let mut p = Prioriser::default();
6770        set_prio(&mut p, 1, 3, true);
6771        set_prio(&mut p, 3, 3, true);
6772        set_prio(&mut p, 5, 3, true);
6773
6774        let base = vec![1u32, 3, 5];
6775
6776        // Pass 1: cursor is 0 (initial), so order stays 1, 3, 5.
6777        let mut buf = base.clone();
6778        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6779        assert_eq!(buf, vec![1, 3, 5]);
6780        p.advance_incremental_cursor(Some(1));
6781
6782        // Pass 2: cursor is 1, rotate so 3 comes first.
6783        let mut buf = base.clone();
6784        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6785        assert_eq!(buf, vec![3, 5, 1]);
6786        p.advance_incremental_cursor(Some(3));
6787
6788        // Pass 3: cursor is 3, rotate so 5 comes first.
6789        let mut buf = base.clone();
6790        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6791        assert_eq!(buf, vec![5, 1, 3]);
6792        p.advance_incremental_cursor(Some(5));
6793
6794        // Pass 4: cursor is 5 (largest in bucket), wrap to 1.
6795        let mut buf = base;
6796        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6797        assert_eq!(buf, vec![1, 3, 5]);
6798    }
6799
6800    #[test]
6801    fn test_apply_incremental_rotation_cursor_unknown_id() {
6802        // Cursor points at an ID no longer active (stream completed). Rotation
6803        // should still start from the smallest ID greater than the cursor.
6804        let mut p = Prioriser::default();
6805        set_prio(&mut p, 3, 3, true);
6806        set_prio(&mut p, 5, 3, true);
6807        set_prio(&mut p, 7, 3, true);
6808        p.advance_incremental_cursor(Some(4)); // 4 is not in the bucket
6809
6810        let mut buf = vec![3u32, 5, 7];
6811        assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6812        assert_eq!(buf, vec![5, 7, 3]);
6813    }
6814
6815    #[test]
6816    fn test_apply_incremental_rotation_single_stream_buckets() {
6817        // Single-stream buckets are a degenerate fast path: no reordering.
6818        let mut p = Prioriser::default();
6819        set_prio(&mut p, 1, 1, true);
6820        set_prio(&mut p, 3, 2, false);
6821        set_prio(&mut p, 5, 3, true);
6822
6823        let mut buf = vec![1u32, 3, 5];
6824        let count = p.apply_incremental_rotation(&mut buf);
6825        assert_eq!(count, 2);
6826        assert_eq!(buf, vec![1, 3, 5]);
6827    }
6828
6829    #[test]
6830    fn test_advance_incremental_cursor_none_is_noop() {
6831        // If no incremental stream fires (only non-incremental served), the
6832        // cursor must stay put so fairness is preserved for the next pass.
6833        let mut p = Prioriser::default();
6834        p.advance_incremental_cursor(Some(5));
6835        p.advance_incremental_cursor(None);
6836        assert_eq!(p.incremental_cursor, 5);
6837    }
6838
6839    #[test]
6840    fn test_apply_incremental_rotation_mixed_bucket_with_cursor() {
6841        // Same-urgency bucket with a mix: non-inc served first in ascending
6842        // order, then the incremental tail rotated by cursor.
6843        let mut p = Prioriser::default();
6844        set_prio(&mut p, 1, 3, true);
6845        set_prio(&mut p, 3, 3, false);
6846        set_prio(&mut p, 5, 3, true);
6847        set_prio(&mut p, 7, 3, false);
6848        set_prio(&mut p, 9, 3, true);
6849        p.advance_incremental_cursor(Some(5));
6850
6851        let mut buf = vec![1u32, 3, 5, 7, 9];
6852        let count = p.apply_incremental_rotation(&mut buf);
6853        assert_eq!(count, 3);
6854        // Non-inc (3, 7) first, then incremental rotated: cursor 5 means
6855        // next-after-5 = 9, then 1, then 5 (wrap).
6856        assert_eq!(buf, vec![3, 7, 9, 1, 5]);
6857    }
6858
6859    // ── H2FlowControl ───────────────────────────────────────────────────
6860
6861    #[test]
6862    fn test_flow_control_initial_state() {
6863        let fc = H2FlowControl {
6864            window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
6865            received_bytes_since_update: 0,
6866            pending_window_updates: HashMap::new(),
6867        };
6868        assert_eq!(fc.window, 65535);
6869        assert_eq!(fc.received_bytes_since_update, 0);
6870        assert!(fc.pending_window_updates.is_empty());
6871    }
6872
6873    #[test]
6874    fn test_flow_control_window_update_coalescing() {
6875        let mut updates: HashMap<u32, u32> = HashMap::new();
6876
6877        // First update for stream 1
6878        updates.insert(1, 1000);
6879        assert_eq!(*updates.get(&1).unwrap(), 1000);
6880
6881        // Coalesce second update for same stream
6882        if let Some(existing) = updates.get_mut(&1) {
6883            *existing = existing.saturating_add(500).min(i32::MAX as u32);
6884        }
6885        assert_eq!(*updates.get(&1).unwrap(), 1500);
6886
6887        // Different stream gets its own entry
6888        updates.insert(3, 2000);
6889        assert_eq!(updates.len(), 2);
6890        assert_eq!(*updates.get(&3).unwrap(), 2000);
6891    }
6892
6893    #[test]
6894    fn test_flow_control_window_update_saturation() {
6895        let mut updates: HashMap<u32, u32> = HashMap::new();
6896
6897        // Insert near max and coalesce — should saturate to i32::MAX
6898        let max_increment = i32::MAX as u32;
6899        updates.insert(1, max_increment - 100);
6900        if let Some(existing) = updates.get_mut(&1) {
6901            *existing = existing.saturating_add(200).min(max_increment);
6902        }
6903        assert_eq!(*updates.get(&1).unwrap(), max_increment);
6904    }
6905
6906    #[test]
6907    fn test_flow_control_connection_window_can_go_negative() {
6908        // RFC 9113 §6.9.2: connection-level window can go negative
6909        let mut fc = H2FlowControl {
6910            window: 100,
6911            received_bytes_since_update: 0,
6912            pending_window_updates: HashMap::new(),
6913        };
6914
6915        // Simulate consuming more than available
6916        fc.window -= 200;
6917        assert_eq!(fc.window, -100);
6918    }
6919
6920    // ── H2FloodConfig ───────────────────────────────────────────────────
6921
6922    #[test]
6923    fn test_flood_config_default_values() {
6924        let config = H2FloodConfig::default();
6925        assert_eq!(config.max_rst_stream_per_window, 100);
6926        assert_eq!(config.max_ping_per_window, 100);
6927        assert_eq!(config.max_settings_per_window, 50);
6928        assert_eq!(config.max_empty_data_per_window, 100);
6929        assert_eq!(config.max_continuation_frames, 20);
6930        assert_eq!(config.max_glitch_count, 100);
6931        assert_eq!(config.max_rst_stream_lifetime, 10_000);
6932        assert_eq!(config.max_rst_stream_abusive_lifetime, 50);
6933        assert_eq!(config.max_header_list_size, MAX_HEADER_LIST_SIZE as u32);
6934    }
6935
6936    // ── distribute_overhead ─────────────────────────────────────────────
6937
6938    #[test]
6939    fn test_distribute_overhead_proportional() {
6940        let mut metrics = SessionMetrics::new(None);
6941        let mut overhead_bin = 1000;
6942        let mut overhead_bout = 500;
6943
6944        // Stream transferred 60% of total bytes (not last stream)
6945        distribute_overhead(
6946            &mut metrics,
6947            &mut overhead_bin,
6948            &mut overhead_bout,
6949            (600, 300),  // stream_bytes
6950            (1000, 500), // total_bytes
6951            2,           // active_streams
6952            false,       // is_last_stream
6953        );
6954
6955        assert_eq!(metrics.bin, 600); // 60% of 1000
6956        assert_eq!(metrics.bout, 300); // 60% of 500
6957        assert_eq!(overhead_bin, 400); // 1000 - 600
6958        assert_eq!(overhead_bout, 200); // 500 - 300
6959    }
6960
6961    #[test]
6962    fn test_distribute_overhead_even_split_when_no_bytes() {
6963        let mut metrics = SessionMetrics::new(None);
6964        let mut overhead_bin = 100;
6965        let mut overhead_bout = 200;
6966
6967        // No bytes transferred -> even distribution (not last stream)
6968        distribute_overhead(
6969            &mut metrics,
6970            &mut overhead_bin,
6971            &mut overhead_bout,
6972            (0, 0), // stream_bytes
6973            (0, 0), // total_bytes
6974            4,      // active_streams
6975            false,  // is_last_stream
6976        );
6977
6978        assert_eq!(metrics.bin, 25); // 100 / 4
6979        assert_eq!(metrics.bout, 50); // 200 / 4
6980        assert_eq!(overhead_bin, 75);
6981        assert_eq!(overhead_bout, 150);
6982    }
6983
6984    #[test]
6985    fn test_distribute_overhead_clamps_to_remaining() {
6986        let mut metrics = SessionMetrics::new(None);
6987        let mut overhead_bin = 10;
6988        let mut overhead_bout = 10;
6989
6990        // Stream claims 100% of bytes but overhead is small (last stream)
6991        distribute_overhead(
6992            &mut metrics,
6993            &mut overhead_bin,
6994            &mut overhead_bout,
6995            (1000, 1000), // stream_bytes
6996            (1000, 1000), // total_bytes
6997            1,            // active_streams
6998            true,         // is_last_stream
6999        );
7000
7001        assert_eq!(metrics.bin, 10);
7002        assert_eq!(metrics.bout, 10);
7003        assert_eq!(overhead_bin, 0);
7004        assert_eq!(overhead_bout, 0);
7005    }
7006
7007    #[test]
7008    fn test_distribute_overhead_zero_active_streams() {
7009        let mut metrics = SessionMetrics::new(None);
7010        let mut overhead_bin = 100;
7011        let mut overhead_bout = 100;
7012
7013        // 0 active streams (edge case) — last stream gets all remainder
7014        distribute_overhead(
7015            &mut metrics,
7016            &mut overhead_bin,
7017            &mut overhead_bout,
7018            (0, 0),
7019            (0, 0),
7020            0,
7021            true,
7022        );
7023
7024        assert_eq!(metrics.bin, 100); // last stream gets all remaining
7025        assert_eq!(metrics.bout, 100);
7026        assert_eq!(overhead_bin, 0);
7027        assert_eq!(overhead_bout, 0);
7028    }
7029
7030    #[test]
7031    fn test_distribute_overhead_last_stream_gets_remainder() {
7032        let mut metrics1 = SessionMetrics::new(None);
7033        let mut metrics2 = SessionMetrics::new(None);
7034        let mut overhead_bin = 120;
7035        let mut overhead_bout = 120;
7036
7037        // First stream (not last): gets proportional share
7038        distribute_overhead(
7039            &mut metrics1,
7040            &mut overhead_bin,
7041            &mut overhead_bout,
7042            (100, 100), // stream_bytes
7043            (300, 300), // total_bytes
7044            3,          // active_streams
7045            false,      // is_last_stream
7046        );
7047
7048        let remaining_bin = overhead_bin;
7049        let remaining_bout = overhead_bout;
7050
7051        // Last stream: gets ALL remaining overhead (no rounding loss)
7052        distribute_overhead(
7053            &mut metrics2,
7054            &mut overhead_bin,
7055            &mut overhead_bout,
7056            (100, 100), // stream_bytes
7057            (300, 300), // total_bytes
7058            3,          // active_streams
7059            true,       // is_last_stream
7060        );
7061
7062        assert_eq!(metrics2.bin, remaining_bin);
7063        assert_eq!(metrics2.bout, remaining_bout);
7064        assert_eq!(overhead_bin, 0, "no remainder bytes should be lost");
7065        assert_eq!(overhead_bout, 0, "no remainder bytes should be lost");
7066    }
7067
7068    // ── H2FlowControl (additional edge cases) ─────────────────────────
7069
7070    #[test]
7071    fn test_flow_control_queue_window_update_cap() {
7072        // Verify DEFAULT_MAX_PENDING_WINDOW_UPDATES reflects 1 + 4*MAX_CONCURRENT_STREAMS
7073        assert_eq!(DEFAULT_MAX_PENDING_WINDOW_UPDATES, 1 + 100 * 4);
7074
7075        // Simulate queue reaching capacity
7076        let cap = DEFAULT_MAX_PENDING_WINDOW_UPDATES;
7077        let mut updates: HashMap<u32, u32> = HashMap::new();
7078        for i in 0..cap as u32 {
7079            updates.insert(i, 1000);
7080        }
7081        assert_eq!(updates.len(), cap);
7082
7083        // A new stream ID beyond capacity should be rejected
7084        let next_stream = cap as u32;
7085        let at_cap = updates.len() >= cap;
7086        assert!(at_cap);
7087        assert!(!updates.contains_key(&next_stream));
7088
7089        // Verify custom max_concurrent_streams produces proportional cap
7090        let custom_cap = 1 + 500_usize * 4;
7091        assert_eq!(custom_cap, 2001);
7092    }
7093
7094    #[test]
7095    fn test_h2_connection_config_defaults() {
7096        let config = H2ConnectionConfig::default();
7097        assert_eq!(config.initial_connection_window, ENLARGED_CONNECTION_WINDOW);
7098        assert_eq!(
7099            config.max_concurrent_streams,
7100            DEFAULT_MAX_CONCURRENT_STREAMS
7101        );
7102        assert_eq!(config.stream_shrink_ratio, 2);
7103    }
7104
7105    #[test]
7106    fn test_h2_connection_config_clamp_window_lower_bound() {
7107        // Below minimum: clamped to DEFAULT_INITIAL_WINDOW_SIZE (65535)
7108        let config = H2ConnectionConfig::new(100, 100, 2);
7109        assert_eq!(
7110            config.initial_connection_window,
7111            DEFAULT_INITIAL_WINDOW_SIZE
7112        );
7113    }
7114
7115    #[test]
7116    fn test_h2_connection_config_clamp_window_upper_bound() {
7117        // Above maximum: clamped to FLOW_CONTROL_MAX_WINDOW (2^31-1)
7118        let config = H2ConnectionConfig::new(u32::MAX, 100, 2);
7119        assert_eq!(config.initial_connection_window, FLOW_CONTROL_MAX_WINDOW);
7120    }
7121
7122    #[test]
7123    fn test_h2_connection_config_clamp_window_exact_minimum() {
7124        // Exactly minimum: no clamping, no zero-increment WINDOW_UPDATE risk
7125        let config = H2ConnectionConfig::new(DEFAULT_INITIAL_WINDOW_SIZE, 100, 2);
7126        assert_eq!(
7127            config.initial_connection_window,
7128            DEFAULT_INITIAL_WINDOW_SIZE
7129        );
7130        // Increment to send would be 0 — the code guards this with `if increment > 0`
7131        let increment = config
7132            .initial_connection_window
7133            .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
7134        assert_eq!(increment, 0);
7135    }
7136
7137    #[test]
7138    fn test_h2_connection_config_clamp_shrink_ratio() {
7139        // Below minimum: clamped to 2 (1 would defeat recycling)
7140        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 0);
7141        assert_eq!(config.stream_shrink_ratio, 2);
7142        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 1);
7143        assert_eq!(config.stream_shrink_ratio, 2);
7144    }
7145
7146    #[test]
7147    fn test_h2_connection_config_clamp_concurrent_streams() {
7148        let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 0, 2);
7149        assert_eq!(config.max_concurrent_streams, 1);
7150    }
7151
7152    #[test]
7153    fn test_h2_connection_config_from_optional_uses_defaults() {
7154        let config = H2ConnectionConfig::from_optional(None, None, None);
7155        let defaults = H2ConnectionConfig::default();
7156        assert_eq!(config, defaults);
7157    }
7158
7159    #[test]
7160    fn test_h2_connection_config_from_optional_overrides() {
7161        let config = H2ConnectionConfig::from_optional(Some(2_000_000), Some(500), Some(4));
7162        assert_eq!(config.initial_connection_window, 2_000_000);
7163        assert_eq!(config.max_concurrent_streams, 500);
7164        assert_eq!(config.stream_shrink_ratio, 4);
7165    }
7166
7167    #[test]
7168    fn test_flow_control_window_settings_change_negative() {
7169        // RFC 9113 §6.9.2: A change to SETTINGS_INITIAL_WINDOW_SIZE can cause
7170        // the flow-control window to become negative.
7171        let mut fc = H2FlowControl {
7172            window: 100,
7173            received_bytes_since_update: 0,
7174            pending_window_updates: HashMap::new(),
7175        };
7176
7177        // Simulate SETTINGS_INITIAL_WINDOW_SIZE reduction:
7178        // old_initial = 65535, new_initial = 10 => delta = 10 - 65535 = -65525
7179        let old_initial: i32 = DEFAULT_INITIAL_WINDOW_SIZE as i32;
7180        let new_initial: i32 = 10;
7181        let delta = new_initial - old_initial; // -65525
7182        fc.window += delta;
7183
7184        assert!(
7185            fc.window < 0,
7186            "Window must be able to go negative after settings change"
7187        );
7188        assert_eq!(fc.window, 100 + (10 - 65535));
7189    }
7190
7191    #[test]
7192    fn test_flow_control_coalesce_saturates_at_max_increment() {
7193        let max_increment = i32::MAX as u32;
7194        let mut updates: HashMap<u32, u32> = HashMap::new();
7195
7196        // Insert at max and try to coalesce more
7197        updates.insert(1, max_increment);
7198        if let Some(existing) = updates.get_mut(&1) {
7199            *existing = existing.saturating_add(1000).min(max_increment);
7200        }
7201        assert_eq!(*updates.get(&1).unwrap(), max_increment);
7202    }
7203
7204    // ── H2FloodConfig (additional) ───────────────────────────────────
7205
7206    #[test]
7207    fn test_flood_config_default_matches_constants() {
7208        let config = H2FloodConfig::default();
7209        assert_eq!(
7210            config.max_rst_stream_per_window,
7211            DEFAULT_MAX_RST_STREAM_PER_WINDOW
7212        );
7213        assert_eq!(config.max_ping_per_window, DEFAULT_MAX_PING_PER_WINDOW);
7214        assert_eq!(
7215            config.max_settings_per_window,
7216            DEFAULT_MAX_SETTINGS_PER_WINDOW
7217        );
7218        assert_eq!(
7219            config.max_empty_data_per_window,
7220            DEFAULT_MAX_EMPTY_DATA_PER_WINDOW
7221        );
7222        assert_eq!(
7223            config.max_continuation_frames,
7224            DEFAULT_MAX_CONTINUATION_FRAMES
7225        );
7226        assert_eq!(config.max_glitch_count, DEFAULT_MAX_GLITCH_COUNT);
7227    }
7228
7229    #[test]
7230    fn test_flood_config_equality() {
7231        let config_a = H2FloodConfig::default();
7232        let config_b = H2FloodConfig::default();
7233        assert_eq!(config_a, config_b);
7234
7235        let config_c = H2FloodConfig {
7236            max_rst_stream_per_window: 1,
7237            ..H2FloodConfig::default()
7238        };
7239        assert_ne!(config_a, config_c);
7240    }
7241
7242    // ── distribute_overhead (additional edge cases) ───────────────────
7243
7244    #[test]
7245    fn test_distribute_overhead_asymmetric_in_out() {
7246        let mut metrics = SessionMetrics::new(None);
7247        let mut overhead_bin = 1000;
7248        let mut overhead_bout = 1000;
7249
7250        // Stream transferred 100% inbound, 0% outbound (not last stream)
7251        distribute_overhead(
7252            &mut metrics,
7253            &mut overhead_bin,
7254            &mut overhead_bout,
7255            (500, 0),   // stream_bytes
7256            (500, 100), // total_bytes
7257            2,          // active_streams
7258            false,      // is_last_stream
7259        );
7260
7261        assert_eq!(metrics.bin, 1000); // 100% of inbound overhead
7262        assert_eq!(metrics.bout, 0); // 0% of outbound overhead
7263        assert_eq!(overhead_bin, 0);
7264        assert_eq!(overhead_bout, 1000);
7265    }
7266
7267    #[test]
7268    fn test_distribute_overhead_many_streams_accumulate() {
7269        let mut metrics = SessionMetrics::new(None);
7270        let mut overhead_bin = 120;
7271        let mut overhead_bout = 120;
7272
7273        // Three equal streams, each calling distribute_overhead.
7274        // With is_last_stream on the third call, the last stream gets all
7275        // remaining overhead, so no rounding loss occurs.
7276        //   call 1: 120 * 100/300 = 40 -> remaining 80
7277        //   call 2:  80 * 100/300 = 26 -> remaining 54
7278        //   call 3: last stream gets all remaining = 54
7279        // Total distributed: 40 + 26 + 54 = 120 (no loss)
7280        for i in 0..3 {
7281            distribute_overhead(
7282                &mut metrics,
7283                &mut overhead_bin,
7284                &mut overhead_bout,
7285                (100, 100), // stream_bytes
7286                (300, 300), // total_bytes
7287                3,          // active_streams
7288                i == 2,     // is_last_stream on final call
7289            );
7290        }
7291
7292        assert_eq!(metrics.bin, 120);
7293        assert_eq!(metrics.bout, 120);
7294        // No rounding residual — last stream absorbed the remainder
7295        assert_eq!(overhead_bin, 0);
7296        assert_eq!(overhead_bout, 0);
7297    }
7298
7299    // ── Hex chunk formatting ────────────────────────────────────────────
7300
7301    /// Verify that the Vec<u8> + write!() hex formatting used in
7302    /// handle_data_frame produces output identical to format!("{:x}").
7303    #[test]
7304    fn test_hex_chunk_length_formatting() {
7305        use std::io::Write as _;
7306
7307        let cases: &[(usize, &[u8])] = &[
7308            (1, b"1"),
7309            (15, b"f"),
7310            (16, b"10"),
7311            (255, b"ff"),
7312            (256, b"100"),
7313            (4096, b"1000"),
7314            (65535, b"ffff"),
7315            (65536, b"10000"),
7316        ];
7317
7318        for &(payload_len, expected) in cases {
7319            let mut buf = Vec::with_capacity(16);
7320            let _ = write!(buf, "{payload_len:x}");
7321            assert_eq!(
7322                buf, expected,
7323                "hex formatting mismatch for payload_len={payload_len}"
7324            );
7325        }
7326
7327        // usize::MAX tested separately to avoid temporary lifetime issue
7328        let max_expected = format!("{:x}", usize::MAX);
7329        let mut buf = Vec::with_capacity(16);
7330        let _ = write!(buf, "{:x}", usize::MAX);
7331        assert_eq!(buf, max_expected.as_bytes());
7332    }
7333
7334    // ── Stream-ID allocation / exhaustion ──────────────────────────────────
7335
7336    /// A fresh client connection starts with `last_stream_id == 0`. The first
7337    /// call MUST issue stream `1` (odd, RFC 9113 §5.1.1) and advance the
7338    /// watermark to `2`.
7339    #[test]
7340    fn test_next_stream_id_client_first_allocation() {
7341        let (issued, next) = next_stream_id(0, true).expect("fresh client must allocate");
7342        assert_eq!(issued, 1);
7343        assert_eq!(next, 2);
7344    }
7345
7346    /// Client allocation yields strictly increasing odd identifiers
7347    /// (1, 3, 5, ...) as required by RFC 9113 §5.1.1.
7348    #[test]
7349    fn test_next_stream_id_client_sequence_is_odd_and_monotonic() {
7350        let mut last = 0u32;
7351        let mut issued_ids = Vec::with_capacity(8);
7352        for _ in 0..8 {
7353            let (id, next) = next_stream_id(last, true).expect("unexhausted");
7354            assert_eq!(id & 1, 1, "client stream ids must be odd (RFC 9113 §5.1.1)");
7355            assert!(issued_ids.last().is_none_or(|prev: &u32| id > *prev));
7356            issued_ids.push(id);
7357            last = next;
7358        }
7359        assert_eq!(issued_ids, vec![1, 3, 5, 7, 9, 11, 13, 15]);
7360    }
7361
7362    /// Server-side allocation yields even identifiers. The helper
7363    /// convention is `watermark - 2` for server, `watermark - 1` for client,
7364    /// so both sides share the same monotonically-increasing even watermark.
7365    /// Sōzu never server-pushes, but the helper must be symmetric so push
7366    /// could be enabled without a regression.
7367    #[test]
7368    fn test_next_stream_id_server_is_even() {
7369        // `last = 2` means the most recent allocation advanced the watermark
7370        // to 2; server then issues `2 - 2 = 0`. This is an artefact of the
7371        // shared watermark and only matters in tests — server never uses it.
7372        let (issued, next) = next_stream_id(2, false).expect("server allocation");
7373        assert_eq!(issued & 1, 0, "server stream ids must be even");
7374        assert_eq!(next, 4);
7375        assert_eq!(issued, 2);
7376
7377        let (issued, next) = next_stream_id(next, false).expect("second slot");
7378        assert_eq!(issued, 4);
7379        assert_eq!(issued & 1, 0);
7380        assert_eq!(next, 6);
7381    }
7382
7383    /// The last client-issuable odd stream ID is `STREAM_ID_MAX = 0x7FFF_FFFF`.
7384    /// To issue it the watermark must advance to `STREAM_ID_MAX + 1 = 2³¹`;
7385    /// the caller therefore supplies `last = STREAM_ID_MAX - 1 = 0x7FFF_FFFE`.
7386    /// That call MUST succeed and return the max ID; the post-call watermark
7387    /// sits at `2³¹`, which is the sentinel that makes the next call fail.
7388    #[test]
7389    fn test_next_stream_id_client_final_slot_allocates() {
7390        let last = STREAM_ID_MAX - 1;
7391        let (issued, next) = next_stream_id(last, true).expect("final slot still allocates");
7392        assert_eq!(issued, STREAM_ID_MAX);
7393        assert_eq!(next, STREAM_ID_MAX + 1);
7394        // And the very next call MUST refuse rather than wrap.
7395        assert!(next_stream_id(next, true).is_none());
7396    }
7397
7398    /// Exhaustion case: once the client has issued stream ID `STREAM_ID_MAX`,
7399    /// the watermark sits at `STREAM_ID_MAX + 1`. The next request MUST return
7400    /// `None` — without this guard the helper would issue `STREAM_ID_MAX + 2`
7401    /// (wrapped down to an even id), which would (a) use the reserved
7402    /// high bit and (b) violate the odd-parity invariant for client streams.
7403    #[test]
7404    fn test_next_stream_id_client_exhausted_returns_none() {
7405        let last = STREAM_ID_MAX + 1;
7406        assert!(next_stream_id(last, true).is_none());
7407    }
7408
7409    /// Exhaustion via `checked_add` saturation: defence in depth in case a
7410    /// caller jumps `last_stream_id` close to `u32::MAX`. The helper must
7411    /// not panic nor overflow — it must return `None`.
7412    #[test]
7413    fn test_next_stream_id_saturates_near_u32_max() {
7414        assert!(next_stream_id(u32::MAX, true).is_none());
7415        assert!(next_stream_id(u32::MAX - 1, true).is_none());
7416    }
7417
7418    /// Server-side exhaustion: same guard, even-parity identifier space.
7419    #[test]
7420    fn test_next_stream_id_server_exhausted_returns_none() {
7421        let last = STREAM_ID_MAX + 1;
7422        assert!(next_stream_id(last, false).is_none());
7423    }
7424
7425    /// Regression guard: the helper must never issue a stream ID that
7426    /// exceeds `STREAM_ID_MAX` for either side, no matter where the
7427    /// watermark sits. This walks every value in a neighbourhood of the
7428    /// boundary to rule out off-by-one errors.
7429    #[test]
7430    fn test_next_stream_id_never_exceeds_stream_id_max() {
7431        for last in (STREAM_ID_MAX - 4)..=(STREAM_ID_MAX + 4) {
7432            for is_client in [true, false] {
7433                if let Some((issued, next)) = next_stream_id(last, is_client) {
7434                    assert!(
7435                        issued <= STREAM_ID_MAX,
7436                        "issued id {issued} exceeds STREAM_ID_MAX (last={last}, is_client={is_client})"
7437                    );
7438                    // `next` is the post-allocation watermark and may sit at
7439                    // STREAM_ID_MAX + 1 — the very next call must then return None.
7440                    if next > STREAM_ID_MAX {
7441                        assert!(
7442                            next_stream_id(next, is_client).is_none(),
7443                            "second call after final slot must report exhaustion"
7444                        );
7445                    }
7446                }
7447            }
7448        }
7449    }
7450
7451    /// The helper's `is_client` flag must cleanly split the ID space so that
7452    /// a client and a server peered on the same connection cannot collide.
7453    /// Given the same `last_stream_id`, the two parities must differ by 1.
7454    #[test]
7455    fn test_next_stream_id_client_server_parities_disjoint() {
7456        for last in [0u32, 2, 4, 10, 100, 1_000_000, STREAM_ID_MAX - 3] {
7457            let (client_id, _) = next_stream_id(last, true).unwrap();
7458            let (server_id, _) = next_stream_id(last, false).unwrap();
7459            assert_eq!(client_id & 1, 1);
7460            assert_eq!(server_id & 1, 0);
7461            assert_eq!(client_id.abs_diff(server_id), 1);
7462        }
7463    }
7464
7465    // ── LIFECYCLE §9 invariant 16: any_stream_id_matches ─────────────────
7466    //
7467    // Covers the iteration dispatch used by `any_stream_has_pending_back`.
7468    // Testing the probe directly against a synthetic closure keeps the
7469    // tests independent of the full `Stream` fixture (which requires a
7470    // `Pool` and a fully-built `HttpContext`).
7471
7472    #[test]
7473    fn test_any_stream_id_matches_empty_map_is_false() {
7474        let streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7475        assert!(!any_stream_id_matches(&streams, |_| true));
7476    }
7477
7478    #[test]
7479    fn test_any_stream_id_matches_all_probe_false_is_false() {
7480        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7481        streams.insert(1, 0);
7482        streams.insert(3, 1);
7483        streams.insert(5, 2);
7484        assert!(!any_stream_id_matches(&streams, |_| false));
7485    }
7486
7487    #[test]
7488    fn test_any_stream_id_matches_any_probe_true_is_true() {
7489        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7490        streams.insert(1, 0);
7491        streams.insert(3, 1);
7492        streams.insert(5, 2);
7493        // Probe is true only for GlobalStreamId == 1 (i.e. StreamId 3).
7494        assert!(any_stream_id_matches(&streams, |gid| gid == 1));
7495    }
7496
7497    #[test]
7498    fn test_any_stream_id_matches_single_entry() {
7499        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7500        streams.insert(42, 7);
7501        assert!(any_stream_id_matches(&streams, |gid| gid == 7));
7502        assert!(!any_stream_id_matches(&streams, |gid| gid == 8));
7503    }
7504
7505    #[test]
7506    fn test_any_stream_id_matches_short_circuits() {
7507        let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7508        streams.insert(1, 0);
7509        streams.insert(3, 1);
7510        streams.insert(5, 2);
7511        streams.insert(7, 3);
7512        let mut calls = 0usize;
7513        let result = any_stream_id_matches(&streams, |_| {
7514            calls += 1;
7515            true
7516        });
7517        assert!(result);
7518        // `Iterator::any` short-circuits on the first `true` — so the probe
7519        // must fire at most once in this construction.
7520        assert_eq!(calls, 1);
7521    }
7522
7523    // ── LIFECYCLE §9 invariant 16: any_stream_has_pending_back ───────────
7524
7525    /// Build a minimal `Stream` for invariant-16 probing. Uses the pool
7526    /// plumbing so `back.blocks` / `back.out` exist; every other field is
7527    /// default-valued because the predicate only reads the back buffer.
7528    fn make_stream_for_invariant_16(pool: &Rc<RefCell<Pool>>, session_ulid: Ulid) -> Stream {
7529        let http_ctx = HttpContext {
7530            keep_alive_backend: true,
7531            keep_alive_frontend: true,
7532            sticky_session_found: None,
7533            method: None,
7534            authority: None,
7535            path: None,
7536            status: None,
7537            reason: None,
7538            user_agent: None,
7539            x_request_id: None,
7540            xff_chain: None,
7541            #[cfg(feature = "opentelemetry")]
7542            otel: None,
7543            closing: false,
7544            session_id: session_ulid,
7545            id: Ulid::generate(),
7546            backend_id: None,
7547            cluster_id: None,
7548            protocol: Protocol::HTTPS,
7549            public_address: "127.0.0.1:0".parse().unwrap(),
7550            session_address: None,
7551            sticky_name: String::new(),
7552            sticky_session: None,
7553            backend_address: None,
7554            tls_server_name: None,
7555            tls_cert_names: None,
7556            strict_sni_binding: false,
7557            elide_x_real_ip: false,
7558            send_x_real_ip: false,
7559            tls_version: None,
7560            tls_cipher: None,
7561            tls_alpn: None,
7562            sozu_id_header: String::from("Sozu-Id"),
7563            redirect_location: None,
7564            www_authenticate: None,
7565            original_authority: None,
7566            headers_response: Vec::new(),
7567            retry_after_seconds: None,
7568            frontend_redirect_template: None,
7569            redirect_status: None,
7570            access_log_message: None,
7571        };
7572        Stream::new(Rc::downgrade(pool), http_ctx, 65_535)
7573            .expect("pool should have capacity for two buffers")
7574    }
7575
7576    fn make_pool_for_invariant_16() -> Rc<RefCell<Pool>> {
7577        // Two buffer slots per stream (front + back), ten stream slots is
7578        // plenty for the tests below.
7579        Rc::new(RefCell::new(Pool::with_capacity(4, 20, 16_384)))
7580    }
7581
7582    #[test]
7583    fn test_any_stream_has_pending_back_empty_map_is_false() {
7584        let pool = make_pool_for_invariant_16();
7585        let ulid = Ulid::generate();
7586        let streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7587        let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
7588        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7589    }
7590
7591    #[test]
7592    fn test_any_stream_has_pending_back_all_drained_is_false() {
7593        let pool = make_pool_for_invariant_16();
7594        let ulid = Ulid::generate();
7595        let context_streams = vec![
7596            make_stream_for_invariant_16(&pool, ulid),
7597            make_stream_for_invariant_16(&pool, ulid),
7598        ];
7599        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7600        streams_map.insert(1, 0);
7601        streams_map.insert(3, 1);
7602        // Both freshly-built streams have empty back.out and back.blocks
7603        // (Kawa::new starts with empty deques).
7604        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7605    }
7606
7607    #[test]
7608    fn test_any_stream_has_pending_back_unknown_gid_is_false() {
7609        // LIFECYCLE invariant 16 defence-in-depth: an unknown
7610        // `GlobalStreamId` during a stream-removal race must not panic;
7611        // `.get()` must short-circuit to `false`.
7612        let pool = make_pool_for_invariant_16();
7613        let ulid = Ulid::generate();
7614        let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
7615        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7616        // GlobalStreamId 42 is out of range for the 1-element slice above.
7617        streams_map.insert(7, 42);
7618        assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7619    }
7620
7621    #[test]
7622    fn test_any_stream_has_pending_back_with_pending_blocks_is_true() {
7623        let pool = make_pool_for_invariant_16();
7624        let ulid = Ulid::generate();
7625        let mut stream = make_stream_for_invariant_16(&pool, ulid);
7626        // Push one dummy block — any Block variant is fine; the predicate
7627        // only checks `blocks.is_empty()`.
7628        stream.back.blocks.push_back(kawa::Block::StatusLine);
7629        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7630        streams_map.insert(1, 0);
7631        assert!(any_stream_has_pending_back(&streams_map, &[stream]));
7632    }
7633
7634    #[test]
7635    fn test_any_stream_has_pending_back_with_pending_out_is_true() {
7636        let pool = make_pool_for_invariant_16();
7637        let ulid = Ulid::generate();
7638        let mut stream = make_stream_for_invariant_16(&pool, ulid);
7639        // Non-empty out buffer with no blocks.
7640        stream
7641            .back
7642            .out
7643            .push_back(kawa::OutBlock::Store(kawa::Store::Static(b"partial frame")));
7644        let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7645        streams_map.insert(1, 0);
7646        assert!(any_stream_has_pending_back(&streams_map, &[stream]));
7647    }
7648
7649    // ── ready_incremental_by_urgency mid-pass consistency ────────────────
7650    //
7651    // The full RED is in e2e and currently #[ignore]'d (timing-sensitive).
7652    // The scalar logic below pins the saturating_sub + bucket-scoped
7653    // decrement contract the scheduler at h2.rs:2412-2414 + h2.rs:2481
7654    // relies on: a same-urgency transition-to-ineligible MUST drop the
7655    // per-bucket count by exactly 1 and never underflow the u64.
7656
7657    fn make_bucket(counts: &[(u8, usize)]) -> HashMap<u8, usize> {
7658        counts.iter().copied().collect()
7659    }
7660
7661    #[test]
7662    fn ready_incremental_bucket_decrement_reduces_same_urgency_only() {
7663        let mut map = make_bucket(&[(1, 3), (3, 2)]);
7664        let urgency: u8 = 1;
7665        let is_incremental = true;
7666        // Simulate a stream in urgency=1 going ineligible mid-pass.
7667        if is_incremental {
7668            if let Some(c) = map.get_mut(&urgency) {
7669                *c = c.saturating_sub(1);
7670            }
7671        }
7672        assert_eq!(map.get(&1), Some(&2), "urgency-1 bucket must drop to 2");
7673        assert_eq!(map.get(&3), Some(&2), "urgency-3 bucket untouched");
7674    }
7675
7676    #[test]
7677    fn ready_incremental_bucket_decrement_saturates_at_zero() {
7678        let mut map = make_bucket(&[(0, 0)]);
7679        let urgency: u8 = 0;
7680        if let Some(c) = map.get_mut(&urgency) {
7681            *c = c.saturating_sub(1);
7682        }
7683        assert_eq!(map.get(&0), Some(&0), "saturating_sub must not underflow");
7684    }
7685
7686    #[test]
7687    fn ready_incremental_bucket_decrement_skipped_for_non_incremental() {
7688        let mut map = make_bucket(&[(1, 3)]);
7689        let is_incremental = false;
7690        if is_incremental {
7691            if let Some(c) = map.get_mut(&1) {
7692                *c = c.saturating_sub(1);
7693            }
7694        }
7695        assert_eq!(
7696            map.get(&1),
7697            Some(&3),
7698            "non-incremental transitions must not touch the bucket"
7699        );
7700    }
7701
7702    // ── enqueue_rst: queue / dedupe / counter / arm invariants ───────────
7703    //
7704    // `enqueue_rst_into` is the free-function primitive shared by all three
7705    // RST push sites (DATA-on-closed, refuse_stream_and_discard,
7706    // reset_stream). The method delegates; the invariants live here.
7707
7708    #[test]
7709    fn test_enqueue_rst_into_populates_queue_and_dedupe() {
7710        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7711        let mut total: usize = 0;
7712        let mut sent: HashSet<StreamId> = HashSet::new();
7713        let mut readiness = Readiness::new();
7714
7715        let first = enqueue_rst_into(
7716            &mut pending,
7717            &mut total,
7718            &mut sent,
7719            &mut readiness,
7720            5,
7721            H2Error::ProtocolError,
7722        );
7723        assert!(first, "first call must report freshly_queued = true");
7724        // Second call for the same stream must be a no-op AND return
7725        // false so accounting in `Self::enqueue_rst` skips this case.
7726        let second = enqueue_rst_into(
7727            &mut pending,
7728            &mut total,
7729            &mut sent,
7730            &mut readiness,
7731            5,
7732            H2Error::InternalError,
7733        );
7734        assert!(
7735            !second,
7736            "second call for same stream must return freshly_queued = false"
7737        );
7738
7739        assert_eq!(pending.len(), 1, "dedupe must collapse to a single entry");
7740        assert_eq!(
7741            pending[0],
7742            (5, H2Error::ProtocolError),
7743            "the first error wins — second push is ignored"
7744        );
7745        assert_eq!(total, 1, "queued-cap counter must bump exactly once");
7746        assert!(sent.contains(&5), "rst_sent must record the id");
7747    }
7748
7749    #[test]
7750    fn test_enqueue_rst_into_bumps_total_for_distinct_ids() {
7751        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7752        let mut total: usize = 0;
7753        let mut sent: HashSet<StreamId> = HashSet::new();
7754        let mut readiness = Readiness::new();
7755
7756        for sid in [1u32, 3, 5, 7] {
7757            enqueue_rst_into(
7758                &mut pending,
7759                &mut total,
7760                &mut sent,
7761                &mut readiness,
7762                sid,
7763                H2Error::ProtocolError,
7764            );
7765        }
7766
7767        assert_eq!(pending.len(), 4);
7768        assert_eq!(total, 4);
7769        assert_eq!(sent.len(), 4);
7770    }
7771
7772    #[test]
7773    fn test_enqueue_rst_into_arms_writable_in_invariant_15_form() {
7774        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7775        let mut total: usize = 0;
7776        let mut sent: HashSet<StreamId> = HashSet::new();
7777        let mut readiness = Readiness::new();
7778
7779        // Precondition: no WRITABLE bits set.
7780        assert!(!readiness.interest.is_writable());
7781        assert!(!readiness.event.is_writable());
7782
7783        enqueue_rst_into(
7784            &mut pending,
7785            &mut total,
7786            &mut sent,
7787            &mut readiness,
7788            9,
7789            H2Error::FlowControlError,
7790        );
7791
7792        // Postcondition: invariant-15 — both `interest` and `event` WRITABLE
7793        // are raised so the next tick runs `writable()` under edge-triggered
7794        // epoll.
7795        assert!(
7796            readiness.interest.is_writable(),
7797            "arm_writable must raise the interest bit"
7798        );
7799        assert!(
7800            readiness.event.is_writable(),
7801            "arm_writable must raise the event bit (edge-triggered epoll)"
7802        );
7803    }
7804
7805    #[test]
7806    fn test_enqueue_rst_into_dedupe_does_not_rearm_writable() {
7807        // Dedupe is a pure short-circuit: if the stream id is already in
7808        // `rst_sent`, we do not touch the readiness. This matters because
7809        // a re-entrant reset_stream call during a cascading error path
7810        // would otherwise re-raise WRITABLE unnecessarily — harmless but
7811        // noisy in metrics.
7812        let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7813        let mut total: usize = 0;
7814        let mut sent: HashSet<StreamId> = HashSet::new();
7815        sent.insert(11);
7816        let mut readiness = Readiness::new();
7817
7818        enqueue_rst_into(
7819            &mut pending,
7820            &mut total,
7821            &mut sent,
7822            &mut readiness,
7823            11,
7824            H2Error::ProtocolError,
7825        );
7826
7827        assert!(
7828            pending.is_empty(),
7829            "already-sent ids must not queue a second frame"
7830        );
7831        assert_eq!(total, 0);
7832        assert!(!readiness.interest.is_writable());
7833        assert!(!readiness.event.is_writable());
7834    }
7835
7836    // ── forcefully_terminate_answer arms WRITABLE for ET epoll ───────────
7837    //
7838    // Gap A in the h2spec diagnosis: the pre-fix code set `interest` but
7839    // never raised `event`, so `filter_interest() = event & interest` was
7840    // zero and `writable()` was never scheduled. This test pins the fix.
7841
7842    #[test]
7843    fn test_forcefully_terminate_answer_arms_event_and_interest() {
7844        let pool = make_pool_for_invariant_16();
7845        let ulid = Ulid::generate();
7846        let mut stream = make_stream_for_invariant_16(&pool, ulid);
7847        let mut readiness = Readiness::new();
7848
7849        assert!(!readiness.interest.is_writable());
7850        assert!(!readiness.event.is_writable());
7851
7852        forcefully_terminate_answer(&mut stream, &mut readiness, H2Error::ProtocolError);
7853
7854        assert!(
7855            readiness.interest.is_writable(),
7856            "forcefully_terminate_answer must set the WRITABLE interest bit"
7857        );
7858        assert!(
7859            readiness.event.is_writable(),
7860            "forcefully_terminate_answer must set the WRITABLE event bit — \
7861             without this, filter_interest() = 0 under edge-triggered epoll \
7862             and writable() is never scheduled (h2spec Gap A)"
7863        );
7864    }
7865}
sozu_lib/protocol/mux/h2.rs

sozu_lib/protocol/mux/
h2.rs