sozu_lib/protocol/mux/h2.rs
1//! H2 mux connection wrapper (RFC 9113).
2//!
3//! Owns wire-side connection state: HPACK encoder/decoder, peer settings,
4//! flow window, GOAWAY/RST attribution, and the [`H2FloodDetector`] backing
5//! the CVE-2023-44487 / CVE-2024-27316 / CVE-2025-8671 mitigations. Stream
6//! storage lives in the sibling `Context<L>` (`mux/mod.rs`); this module is
7//! the canonical home for the edge-trigger discipline — paths that queue
8//! bytes for a later event-loop pass must arm writable / signal pending
9//! write (cf. `arm_writable()` at the deferred-control-frame sites and
10//! `lib/src/lib.rs:1006`-`1010`).
11
12use std::{
13 cmp::min,
14 collections::{HashMap, HashSet},
15 io::{IoSlice, Write as _},
16 time::{Duration, Instant},
17};
18
19/// Compile-time guard: `payload_len as usize` casts in the H2 parser assume at
20/// least 32-bit pointer width. This prevents silent truncation on platforms
21/// with smaller pointers (e.g. 16-bit embedded targets).
22const _: () = assert!(
23 std::mem::size_of::<usize>() >= 4,
24 "sozu requires at least 32-bit pointers"
25);
26
27use rusty_ulid::Ulid;
28use sozu_command::{logging::ansi_palette, ready::Ready};
29
30use crate::metrics::names;
31use crate::{
32 L7ListenerHandler, ListenerHandler, Protocol, Readiness, SessionMetrics,
33 protocol::mux::{
34 BackendStatus, Context, DebugEvent, DebugHistory, Endpoint, GenericHttpStream,
35 GlobalStreamId, MuxResult, Position, Stream, StreamId, StreamState, converter,
36 forcefully_terminate_answer,
37 parser::{self, Frame, FrameHeader, FrameType, H2Error, Headers, WindowUpdate},
38 pkawa, remove_backend_stream, serializer, set_default_answer,
39 shared::{EndStreamAction, drain_tls_close_notify, end_stream_decision},
40 update_readiness_after_read, update_readiness_after_write,
41 },
42 socket::{SocketHandler, SocketResult, stats::socket_rtt},
43 timer::TimeoutContainer,
44};
45
46/// Protocol label + session descriptor used as a prefix on every
47/// [`ConnectionH2`] log line. Matches the RUSTLS log-context convention:
48/// `MUX-H2\tSession(...)\t >>>`. When colored output is enabled (via
49/// [`ansi_palette`]) the label is wrapped in bold bright-white ANSI (uniform
50/// across every protocol) and the session detail is rendered in light grey.
51///
52/// Fields included in the session block (chosen to surface the most common
53/// H2 troubleshooting axes — flow stall, leaked stream, draining state,
54/// peer-side gap, reset-flood exposure):
55/// - `peer` — peer address (or `None` if the socket is gone)
56/// - `position` — `Server` / `Client(...)` orientation
57/// - `state` — current [`H2State`]
58/// - `streams` — number of in-flight streams on this connection
59/// - `last_peer_id` — `highest_peer_stream_id` (gap to the peer's view)
60/// - `window` — connection-level send window (RFC 9113 §6.9)
61/// - `draining` — set after the first GOAWAY of a graceful shutdown
62/// - `total_rst_streams_emitted_lifetime` — MadeYouReset counter (CVE-2025-8671)
63/// - `total_rst_received_lifetime` — Rapid Reset counter (CVE-2023-44487)
64/// - `readiness` — connection-level mio readiness snapshot
65///
66/// Computed lazily on each callsite — the helper only materialises when the
67/// log level is enabled, so uncolored hot paths keep a single thread-local
68/// read (the colored check) and one `format!` allocation.
69macro_rules! log_context {
70 ($self:expr) => {{
71 let (open, reset, grey, gray, white) = ansi_palette();
72 format!(
73 "[{ulid} - - -]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
74 open = open,
75 reset = reset,
76 grey = grey,
77 gray = gray,
78 white = white,
79 ulid = $self.session_ulid,
80 peer = $self.socket.socket_ref().peer_addr().ok(),
81 position = $self.position,
82 state = $self.state,
83 streams = $self.streams.len(),
84 last_peer_id = $self.highest_peer_stream_id,
85 window = $self.flow_control.window,
86 draining = $self.drain.draining,
87 total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
88 total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
89 readiness = $self.readiness,
90 )
91 }};
92}
93
94/// Per-stream variant of [`log_context!`] used when a [`Stream`]'s
95/// [`HttpContext`](crate::protocol::kawa_h1::editor::HttpContext) is in
96/// scope. Populates the `request_id`, `cluster_id` and `backend_id` slots of
97/// the bracket so the log line can be filtered by the specific H2 stream it
98/// belongs to.
99#[allow(unused_macros)]
100macro_rules! log_context_stream {
101 ($self:expr, $http_context:expr) => {{
102 let (open, reset, grey, gray, white) = ansi_palette();
103 format!(
104 "[{ulid} {req} {cluster} {backend}]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
105 open = open,
106 reset = reset,
107 grey = grey,
108 gray = gray,
109 white = white,
110 ulid = $self.session_ulid,
111 req = $http_context.id,
112 cluster = $http_context.cluster_id.as_deref().unwrap_or("-"),
113 backend = $http_context.backend_id.as_deref().unwrap_or("-"),
114 peer = $self.socket.socket_ref().peer_addr().ok(),
115 position = $self.position,
116 state = $self.state,
117 streams = $self.streams.len(),
118 last_peer_id = $self.highest_peer_stream_id,
119 window = $self.flow_control.window,
120 draining = $self.drain.draining,
121 total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
122 total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
123 readiness = $self.readiness,
124 )
125 }};
126}
127
128/// Module-level prefix without session context, for logs emitted from
129/// free functions, `H2ConnectionConfig` validation and other sites where no
130/// `ConnectionH2` is in scope. Keeps the `MUX-H2` label consistent with
131/// connection logs and honours the colored flag.
132macro_rules! log_module_context {
133 () => {{
134 let (open, reset, _, _, _) = ansi_palette();
135 format!("{open}MUX-H2{reset}\t >>>", open = open, reset = reset)
136 }};
137}
138
139/// `if let Some(violation) = self.flood_detector.check_flood() { return self.handle_flood_violation(violation); }`
140/// pattern wrapped as a single statement. Pure dispatch — the actual flood
141/// thresholds and counters live inside `H2FloodDetector::check_flood` and
142/// `ConnectionH2::handle_flood_violation`, which the macro does not touch.
143/// Use this at every per-frame counter bump site so the wrapper stays
144/// uniform and a future grep for "flood-check forgot to return" finds zero.
145macro_rules! check_flood_or_return {
146 ($self:expr) => {
147 if let Some(violation) = $self.flood_detector.check_flood() {
148 return $self.handle_flood_violation(violation);
149 }
150 };
151}
152
153/// Outcome of a single-stream write flush in write_streams.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155enum FlushOutcome {
156 /// All queued bytes were drained to the socket.
157 Drained,
158 /// The socket blocked before the queue was drained. The caller must
159 /// arrange to resume (set expect_write or return from write_streams).
160 Stalled,
161}
162
163// ── RFC 9113 §6.5.2 Settings Defaults ───────────────────────────────────────
164
165const DEFAULT_HEADER_TABLE_SIZE: u32 = 4096;
166const DEFAULT_MAX_CONCURRENT_STREAMS: u32 = 100;
167pub(super) const DEFAULT_INITIAL_WINDOW_SIZE: u32 = (1 << 16) - 1; // 65535
168const DEFAULT_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
169
170// RFC 9113 §6.5.2: SETTINGS_MAX_FRAME_SIZE valid range [2^14, 2^24)
171const MIN_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
172const MAX_MAX_FRAME_SIZE: u32 = 1 << 24; // 16777216 (exclusive upper bound)
173
174// RFC 9113 §6.9: maximum flow control window size (2^31 - 1)
175const FLOW_CONTROL_MAX_WINDOW: u32 = (1 << 31) - 1;
176// RFC 9113 §5.1.1: stream identifiers are 31-bit unsigned integers (2^31 - 1).
177const STREAM_ID_MAX: u32 = 0x7FFF_FFFF;
178
179/// Allocate the next locally-initiated stream identifier given the current
180/// `last_stream_id` watermark, returning `(issued_id, next_last_stream_id)`
181/// or `None` when the 31-bit space is exhausted.
182///
183/// RFC 9113 §5.1.1 reserves odd identifiers for clients and even identifiers
184/// for servers. Sōzu never server-pushes, so in practice this helper is
185/// called on the backend (client) side via [`ConnectionH2::new_stream_id`].
186/// The server branch is kept symmetrical so the behaviour is exercised by
187/// the unit tests and remains correct if push is ever enabled.
188///
189/// `last_stream_id` tracks the even "watermark" (2, 4, 6, ...). A client call
190/// issues `watermark - 1` (odd), a server call issues `watermark - 2` (even).
191/// The helper enforces two invariants:
192/// - the issued identifier never exceeds `STREAM_ID_MAX` (2³¹ - 1); and
193/// - the returned watermark is a valid starting point for the next call.
194///
195/// Exhaustion is reported with `None` to the caller, which must emit
196/// GOAWAY(NO_ERROR) and stop issuing new streams on this connection
197/// (see `start_stream` for the client-side drain path).
198pub(super) fn next_stream_id(
199 last_stream_id: StreamId,
200 is_client: bool,
201) -> Option<(StreamId, StreamId)> {
202 let next = last_stream_id.checked_add(2)?;
203 let issued = if is_client {
204 next.checked_sub(1)?
205 } else {
206 next.checked_sub(2)?
207 };
208 // RFC 9113 §5.1.1: stream identifiers are 31-bit. Reject any allocation
209 // whose issued value would exceed `STREAM_ID_MAX`; the watermark itself
210 // is allowed to sit at `STREAM_ID_MAX + 1` (the sentinel that fails the
211 // next call).
212 if issued > STREAM_ID_MAX {
213 return None;
214 }
215 // Post-conditions (RFC 9113 §5.1.1):
216 // - the issued id fits the 31-bit space;
217 // - the returned watermark is strictly greater than the id we issued, so a
218 // subsequent call cannot re-issue or regress;
219 // - role-parity: client ids are odd, server ids even. This holds ONLY when
220 // `last_stream_id` is an even watermark, which is the helper's documented
221 // contract and what production always maintains (`create_stream` rounds to
222 // `(stream_id + 2) & !1`; the connection initialises it to 0). The unit
223 // tests deliberately feed odd `last` values at the saturation boundary, so
224 // the parity check is gated on the watermark being even — a parity slip
225 // from an *even* watermark would let two roles collide on one id.
226 debug_assert!(
227 issued <= STREAM_ID_MAX,
228 "issued stream id must fit the 31-bit space"
229 );
230 debug_assert!(
231 next > issued,
232 "the next watermark must advance strictly past the issued id"
233 );
234 debug_assert!(
235 last_stream_id & 1 != 0 || (issued & 1 == 1) == is_client,
236 "from an even watermark, client ids must be odd and server ids even (RFC 9113 §5.1.1)"
237 );
238 Some((issued, next))
239}
240
241/// Enlarged connection-level receive window (1 MB).
242/// The RFC 9113 default is 65 535 bytes, which is too small for high-throughput
243/// proxying and causes excessive WINDOW_UPDATE round-trips. 1 MB matches the
244/// initial window used by HAProxy, the h2 crate, and other production proxies.
245const ENLARGED_CONNECTION_WINDOW: u32 = 1_048_576;
246
247/// H2 client connection preface size: 24-byte magic + 9-byte SETTINGS frame header
248pub(super) const CLIENT_PREFACE_SIZE: usize = 24 + parser::FRAME_HEADER_SIZE;
249
250// ── Flood Detection Thresholds (CVE mitigations) ────────────────────────────
251
252/// Default maximum RST_STREAM frames per window (CVE-2023-44487 Rapid Reset + CVE-2019-9514)
253const DEFAULT_MAX_RST_STREAM_PER_WINDOW: u32 = 100;
254/// Hard lifetime cap on total RST_STREAM frames received on a single
255/// connection (CVE-2023-44487 Rapid Reset).
256///
257/// The per-window counter half-decays, which allows a patient attacker to
258/// sustain ~50 RST/sec indefinitely — each one costs the backend a request
259/// that will be cancelled before any response work is produced. A lifetime
260/// counter that never decays puts an absolute ceiling on that amplification
261/// per connection. 10 000 is generous for legitimate traffic (months of
262/// occasional client-side cancellations) but rapidly trips on the ~30/sec
263/// abusive pace reported in the CVE-2023-44487 advisory (~5 minutes).
264pub(super) const DEFAULT_MAX_RST_STREAM_LIFETIME: u64 = 10_000;
265/// Hard lifetime cap on RST_STREAM frames received BEFORE the corresponding
266/// backend response has started. These are the cheap-for-client /
267/// expensive-for-us resets that characterise Rapid Reset: the client pays
268/// one RST frame, we pay a round-trip to the backend plus request parsing.
269/// A much lower ceiling kills the attack well before 10 000 lifetime total.
270pub(super) const DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME: u64 = 50;
271/// Absolute lifetime cap on **server-emitted** RST_STREAM frames on a single
272/// connection (CVE-2025-8671 — "MadeYouReset"). Distinct from
273/// [`DEFAULT_MAX_RST_STREAM_LIFETIME`] which caps *received* RSTs
274/// (CVE-2023-44487 Rapid Reset).
275///
276/// MadeYouReset has the server talk itself into flooding: the attacker sends
277/// legitimate-looking frames that force the server to emit RST_STREAM (content
278/// -length mismatch, header parse error, rejected priority, zero-increment
279/// `WINDOW_UPDATE` on an open stream, …). Each forced RST costs the server a
280/// header-decode, kawa buffer setup and frame serialisation; uncapped, it
281/// becomes the same class of DoS as Rapid Reset but with a flipped emission
282/// direction.
283///
284/// 500 is conservative: legitimate traffic very rarely triggers a
285/// server-initiated RST (aside from graceful `NoError` cancels which are not
286/// counted), so crossing 500 on a single connection is a strong abuse signal.
287pub(super) const DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME: u64 = 500;
288/// Default maximum PING frames per window (CVE-2019-9512 Ping Flood)
289const DEFAULT_MAX_PING_PER_WINDOW: u32 = 100;
290/// Absolute lifetime cap on PING frames received on a single connection.
291/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
292/// keep-alives but trips on sustained low-rate abuse (CVE-2019-9512).
293const DEFAULT_MAX_PING_LIFETIME: u32 = 10_000;
294/// Default maximum SETTINGS frames per window (CVE-2019-9515 Settings Flood)
295const DEFAULT_MAX_SETTINGS_PER_WINDOW: u32 = 50;
296/// Absolute lifetime cap on SETTINGS frames received on a single connection.
297/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
298/// renegotiations but trips on sustained low-rate abuse (CVE-2019-9515).
299const DEFAULT_MAX_SETTINGS_LIFETIME: u32 = 10_000;
300/// Default maximum empty DATA frames per window (CVE-2019-9518 Empty Frames)
301const DEFAULT_MAX_EMPTY_DATA_PER_WINDOW: u32 = 100;
302/// Default maximum connection-level (stream 0) WINDOW_UPDATE frames per
303/// sliding window. Non-zero stream-0 WINDOW_UPDATE frames are otherwise
304/// uncounted by the generic glitch detector — a peer could burn proxy CPU by
305/// sending millions of legal-looking stream-0 WINDOW_UPDATEs. Value mirrors
306/// [`DEFAULT_MAX_EMPTY_DATA_PER_WINDOW`] / [`DEFAULT_MAX_PING_PER_WINDOW`] —
307/// legitimate proxies only need a handful per second.
308const DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW: u32 = 100;
309/// Default maximum CONTINUATION frames per header block (CVE-2024-27316)
310const DEFAULT_MAX_CONTINUATION_FRAMES: u32 = 20;
311/// Maximum accumulated header block size across CONTINUATION frames (64KB)
312pub(super) const MAX_HEADER_LIST_SIZE: usize = 65536;
313/// Default maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE)
314/// accepted from the peer. 64 KB is well above the RFC default of 4 KB
315/// while preventing a malicious peer from advertising up to 4 GB.
316const DEFAULT_MAX_HEADER_TABLE_SIZE: u32 = 65536;
317/// Default maximum number of materialized header fields per request/response —
318/// HPACK fields plus expanded cookie crumbs (RFC 9113 §8.2.3). Bounds the HPACK
319/// indexed-reference "header bomb": each 1-byte indexed reference materializes a
320/// `Pair` of per-entry bookkeeping, so an attacker amplifies wire bytes into
321/// allocation. RFC 9113 §6.5.2's +32-octet/field accounting alone caps this at
322/// ~2048 fields for a 64 KB list; this explicit count cap is the tighter,
323/// upstream-matching defense (cf. nginx `max_headers`, Apache `LimitRequestFields`).
324const DEFAULT_MAX_HEADER_FIELDS: u32 = 128;
325/// Cumulative outbound progress (bytes) a window-stalled stream must drain to
326/// clear its flow-control-stall deadline (M2 cumulative-stall budget). Below
327/// this, a `WINDOW_UPDATE(+1)` drip that trickles a few bytes per idle period
328/// cannot keep the slot alive: the deadline ages out and the reaper
329/// RST(CANCEL)s the stream. Chosen as one max H2 DATA frame payload (16 KiB) —
330/// a legitimate slow-but-steady transfer drains at least one frame per idle
331/// period at any realistic bandwidth, while a drip attacker grants far less. A
332/// `const`, not a config knob: `h2_stream_idle_timeout_seconds` is already the
333/// operator dial for slow-link tolerance, and coupling a second knob invites
334/// misconfiguration (high floor + low deadline = mass false reaps).
335const FC_STALL_CLEAR_FLOOR: usize = 16 * 1024;
336/// RFC 9113 §6.5.2: the size accounted against `SETTINGS_MAX_HEADER_LIST_SIZE`
337/// is the uncompressed name + value octets PLUS a 32-octet overhead per field.
338/// The per-field overhead is what bounds the field count under a fixed byte
339/// budget — omitting it lets a peer materialize ~33× more fields than intended.
340pub(super) const HEADER_FIELD_SIZE_OVERHEAD: usize = 32;
341/// Duration of the sliding window for rate-based flood counters
342const FLOOD_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(1);
343/// Default maximum general anomaly count before triggering ENHANCE_YOUR_CALM
344const DEFAULT_MAX_GLITCH_COUNT: u32 = 100;
345
346/// RFC 9113 §5.1.2: threshold of `REFUSED_STREAM` emissions per
347/// [`BACKPRESSURE_WINDOW_DURATION`] that triggers back-pressure — at this
348/// point we halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` so the
349/// peer throttles its request rate instead of paying the RST round-trip for
350/// every new stream.
351const BACKPRESSURE_REFUSAL_THRESHOLD: u32 = 50;
352/// Sliding window used to detect refusal bursts for SETTINGS back-pressure.
353const BACKPRESSURE_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(60);
354
355/// Configurable thresholds for H2 flood detection.
356///
357/// All values have safe defaults matching the compile-time constants.
358/// When configured via listener config, `None` values fall back to these defaults.
359#[derive(Debug, Clone, Copy, PartialEq, Eq)]
360pub struct H2FloodConfig {
361 /// Maximum RST_STREAM frames per second window (CVE-2023-44487, CVE-2019-9514)
362 pub max_rst_stream_per_window: u32,
363 /// Maximum PING frames per second window (CVE-2019-9512)
364 pub max_ping_per_window: u32,
365 /// Maximum SETTINGS frames per second window (CVE-2019-9515)
366 pub max_settings_per_window: u32,
367 /// Maximum empty DATA frames per second window (CVE-2019-9518)
368 pub max_empty_data_per_window: u32,
369 /// Maximum connection-level (stream 0) WINDOW_UPDATE frames per sliding
370 /// window. Caps the CPU cost of a peer sending a flood of non-zero
371 /// stream-0 WINDOW_UPDATEs — each is individually legal so the generic
372 /// glitch counter does not trip, yet millions per connection still burn
373 /// server CPU parsing and updating the flow window.
374 pub max_window_update_stream0_per_window: u32,
375 /// Maximum CONTINUATION frames per header block (CVE-2024-27316)
376 pub max_continuation_frames: u32,
377 /// Maximum accumulated protocol anomalies before ENHANCE_YOUR_CALM
378 pub max_glitch_count: u32,
379 /// Absolute lifetime cap on RST_STREAM frames received on a single
380 /// connection (CVE-2023-44487). Never decays — provides a ceiling the
381 /// per-window counter cannot.
382 pub max_rst_stream_lifetime: u64,
383 /// Lifetime cap on "abusive" (pre-response-start) RST_STREAM frames —
384 /// the Rapid Reset signature (CVE-2023-44487).
385 pub max_rst_stream_abusive_lifetime: u64,
386 /// Absolute lifetime cap on **server-emitted** RST_STREAM frames for this
387 /// connection (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
388 /// count — graceful cancels are exempt.
389 pub max_rst_stream_emitted_lifetime: u64,
390 /// Maximum accumulated HPACK-decoded header list size per request
391 /// (SETTINGS_MAX_HEADER_LIST_SIZE, RFC 9113 §6.5.2).
392 pub max_header_list_size: u32,
393 /// Maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE) accepted
394 /// from the peer. Caps the value the peer advertises in SETTINGS frames to
395 /// prevent unbounded HPACK encoder memory growth.
396 pub max_header_table_size: u32,
397 /// Maximum number of materialized header fields, enforced per HEADERS block
398 /// and (independently) per trailers block — HPACK fields plus expanded
399 /// cookie crumbs (RFC 9113 §8.2.3). Bounds the HPACK indexed-reference
400 /// header bomb, where many 1-byte indexed references each materialize a
401 /// `Pair` of per-entry bookkeeping.
402 pub max_header_fields: u32,
403}
404
405impl Default for H2FloodConfig {
406 fn default() -> Self {
407 Self {
408 max_rst_stream_per_window: DEFAULT_MAX_RST_STREAM_PER_WINDOW,
409 max_ping_per_window: DEFAULT_MAX_PING_PER_WINDOW,
410 max_settings_per_window: DEFAULT_MAX_SETTINGS_PER_WINDOW,
411 max_empty_data_per_window: DEFAULT_MAX_EMPTY_DATA_PER_WINDOW,
412 max_window_update_stream0_per_window: DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW,
413 max_continuation_frames: DEFAULT_MAX_CONTINUATION_FRAMES,
414 max_glitch_count: DEFAULT_MAX_GLITCH_COUNT,
415 max_rst_stream_lifetime: DEFAULT_MAX_RST_STREAM_LIFETIME,
416 max_rst_stream_abusive_lifetime: DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME,
417 max_rst_stream_emitted_lifetime: DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME,
418 max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
419 max_header_table_size: DEFAULT_MAX_HEADER_TABLE_SIZE,
420 max_header_fields: DEFAULT_MAX_HEADER_FIELDS,
421 }
422 }
423}
424
425impl H2FloodConfig {
426 /// Create a validated config, clamping all thresholds to at least 1.
427 /// Zero thresholds would cause immediate flood detection on any frame.
428 #[allow(clippy::too_many_arguments)]
429 pub fn new(
430 max_rst_stream_per_window: u32,
431 max_ping_per_window: u32,
432 max_settings_per_window: u32,
433 max_empty_data_per_window: u32,
434 max_window_update_stream0_per_window: u32,
435 max_continuation_frames: u32,
436 max_glitch_count: u32,
437 max_rst_stream_lifetime: u64,
438 max_rst_stream_abusive_lifetime: u64,
439 max_rst_stream_emitted_lifetime: u64,
440 max_header_list_size: u32,
441 max_header_table_size: u32,
442 max_header_fields: u32,
443 ) -> Self {
444 let config = Self {
445 max_rst_stream_per_window: max_rst_stream_per_window.max(1),
446 max_ping_per_window: max_ping_per_window.max(1),
447 max_settings_per_window: max_settings_per_window.max(1),
448 max_empty_data_per_window: max_empty_data_per_window.max(1),
449 max_window_update_stream0_per_window: max_window_update_stream0_per_window.max(1),
450 max_continuation_frames: max_continuation_frames.max(1),
451 max_glitch_count: max_glitch_count.max(1),
452 max_rst_stream_lifetime: max_rst_stream_lifetime.max(1),
453 max_rst_stream_abusive_lifetime: max_rst_stream_abusive_lifetime.max(1),
454 max_rst_stream_emitted_lifetime: max_rst_stream_emitted_lifetime.max(1),
455 max_header_list_size: max_header_list_size.max(1),
456 max_header_table_size: max_header_table_size.max(1),
457 max_header_fields: max_header_fields.max(1),
458 };
459 // Post-condition: every threshold is clamped to at least 1. A zero
460 // threshold would make `check_flood`/`record_rst_*` trip on the very
461 // first frame (count > 0 > threshold), turning a legitimate connection
462 // into an immediate GOAWAY. This is the central invariant the clamps
463 // above exist to enforce — assert it rather than trusting the `.max(1)`
464 // chain stays correct under future edits.
465 debug_assert!(
466 config.max_rst_stream_per_window >= 1
467 && config.max_ping_per_window >= 1
468 && config.max_settings_per_window >= 1
469 && config.max_empty_data_per_window >= 1
470 && config.max_window_update_stream0_per_window >= 1
471 && config.max_continuation_frames >= 1
472 && config.max_glitch_count >= 1,
473 "every u32 flood threshold must be clamped to >= 1"
474 );
475 debug_assert!(
476 config.max_rst_stream_lifetime >= 1
477 && config.max_rst_stream_abusive_lifetime >= 1
478 && config.max_rst_stream_emitted_lifetime >= 1
479 && config.max_header_list_size >= 1
480 && config.max_header_table_size >= 1
481 && config.max_header_fields >= 1,
482 "every lifetime/size flood threshold must be clamped to >= 1"
483 );
484 config
485 }
486}
487
488/// Default stream Vec shrink ratio: shrink when total > active * ratio.
489const DEFAULT_STREAM_SHRINK_RATIO: u32 = 2;
490
491/// Configurable H2 connection tuning parameters.
492///
493/// All values have safe defaults. When configured via listener config,
494/// absent values fall back to compile-time defaults.
495#[derive(Debug, Clone, Copy, PartialEq, Eq)]
496pub struct H2ConnectionConfig {
497 /// Connection-level receive window size in bytes (RFC 9113 §6.9.2).
498 pub initial_connection_window: u32,
499 /// Maximum concurrent streams (SETTINGS_MAX_CONCURRENT_STREAMS).
500 pub max_concurrent_streams: u32,
501 /// Shrink threshold ratio for recycled stream slots.
502 pub stream_shrink_ratio: u32,
503}
504
505impl Default for H2ConnectionConfig {
506 fn default() -> Self {
507 Self {
508 initial_connection_window: ENLARGED_CONNECTION_WINDOW,
509 max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
510 stream_shrink_ratio: DEFAULT_STREAM_SHRINK_RATIO,
511 }
512 }
513}
514
515impl H2ConnectionConfig {
516 /// Create a validated config, clamping to safe bounds.
517 ///
518 /// - `initial_connection_window`: clamped to \[65535, 2^31-1\] per RFC 9113 §6.9
519 /// - `max_concurrent_streams`: minimum 1
520 /// - `stream_shrink_ratio`: minimum 2 (1 would defeat slot recycling)
521 pub fn new(
522 initial_connection_window: u32,
523 max_concurrent_streams: u32,
524 stream_shrink_ratio: u32,
525 ) -> Self {
526 let clamped_window =
527 initial_connection_window.clamp(DEFAULT_INITIAL_WINDOW_SIZE, FLOW_CONTROL_MAX_WINDOW);
528 if clamped_window != initial_connection_window {
529 warn!(
530 "{} h2_initial_connection_window {} clamped to [{}, {}]",
531 log_module_context!(),
532 initial_connection_window,
533 DEFAULT_INITIAL_WINDOW_SIZE,
534 FLOW_CONTROL_MAX_WINDOW
535 );
536 }
537 const MAX_SAFE_CONCURRENT_STREAMS: u32 = 10_000;
538 let clamped_streams = max_concurrent_streams.clamp(1, MAX_SAFE_CONCURRENT_STREAMS);
539 if max_concurrent_streams > MAX_SAFE_CONCURRENT_STREAMS {
540 error!(
541 "{} h2_max_concurrent_streams={} exceeds safe limit, clamped to {}",
542 log_module_context!(),
543 max_concurrent_streams,
544 MAX_SAFE_CONCURRENT_STREAMS
545 );
546 }
547 if clamped_streams != max_concurrent_streams
548 && max_concurrent_streams <= MAX_SAFE_CONCURRENT_STREAMS
549 {
550 warn!(
551 "{} h2_max_concurrent_streams {} clamped to minimum 1",
552 log_module_context!(),
553 max_concurrent_streams
554 );
555 }
556 let clamped_ratio = stream_shrink_ratio.max(2);
557 if clamped_ratio != stream_shrink_ratio {
558 warn!(
559 "{} h2_stream_shrink_ratio {} clamped to minimum 2",
560 log_module_context!(),
561 stream_shrink_ratio
562 );
563 }
564 let config = Self {
565 initial_connection_window: clamped_window,
566 max_concurrent_streams: clamped_streams,
567 stream_shrink_ratio: clamped_ratio,
568 };
569 // Post-conditions matching the documented clamp ranges. The window must
570 // stay within RFC 9113 §6.9's [65535, 2^31-1] (a window outside this
571 // band desynchronises flow control with the peer); max_concurrent_streams
572 // must be >= 1 (zero would refuse every stream); shrink_ratio must be
573 // >= 2 (1 defeats slot recycling, the whole point of the knob).
574 debug_assert!(
575 (DEFAULT_INITIAL_WINDOW_SIZE..=FLOW_CONTROL_MAX_WINDOW)
576 .contains(&config.initial_connection_window),
577 "clamped connection window must lie within RFC 9113 §6.9 bounds"
578 );
579 debug_assert!(
580 config.max_concurrent_streams >= 1,
581 "clamped max_concurrent_streams must be >= 1"
582 );
583 debug_assert!(
584 config.stream_shrink_ratio >= 2,
585 "clamped stream_shrink_ratio must be >= 2 to keep slot recycling effective"
586 );
587 config
588 }
589
590 /// Create from optional config values, falling back to compile-time defaults.
591 /// Combines unwrap-or-default with validation clamping.
592 pub fn from_optional(
593 window: Option<u32>,
594 max_streams: Option<u32>,
595 shrink_ratio: Option<u32>,
596 ) -> Self {
597 let defaults = Self::default();
598 Self::new(
599 window.unwrap_or(defaults.initial_connection_window),
600 max_streams.unwrap_or(defaults.max_concurrent_streams),
601 shrink_ratio.unwrap_or(defaults.stream_shrink_ratio),
602 )
603 }
604}
605
606/// Default pending WINDOW_UPDATE capacity (used in tests).
607/// The actual per-connection cap is computed from `connection_config.max_concurrent_streams`.
608#[cfg(test)]
609const DEFAULT_MAX_PENDING_WINDOW_UPDATES: usize = 1 + DEFAULT_MAX_CONCURRENT_STREAMS as usize * 4;
610
611/// Maximum number of pending RST_STREAM frames before triggering GOAWAY.
612/// When a peer causes excessive RST_STREAM queueing (e.g. rapid stream creation
613/// beyond MAX_CONCURRENT_STREAMS), this cap prevents unbounded memory growth
614/// and triggers an ENHANCE_YOUR_CALM connection error.
615const MAX_PENDING_RST_STREAMS: usize = 200;
616
617/// RFC 9113 §6.5: maximum time (in seconds) to wait for SETTINGS ACK before
618/// sending GOAWAY with SETTINGS_TIMEOUT error code.
619const SETTINGS_ACK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
620
621#[inline(always)]
622fn error_nom_to_h2(error: nom::Err<parser::ParserError>) -> H2Error {
623 match error {
624 nom::Err::Error(parser::ParserError {
625 kind: parser::ParserErrorKind::H2(e),
626 ..
627 }) => e,
628 nom::Err::Failure(parser::ParserError {
629 kind: parser::ParserErrorKind::H2(e),
630 ..
631 }) => e,
632 _ => H2Error::ProtocolError,
633 }
634}
635
636/// Distribute connection-level byte overhead proportionally to a single stream.
637///
638/// Overhead is distributed in proportion to the bytes this stream transferred
639/// relative to the total across all active streams. A stream that transferred
640/// 60% of total bytes gets 60% of the overhead.
641///
642/// `stream_bytes` and `total_bytes` are `(bytes_in, bytes_out)` tuples.
643/// Falls back to even distribution (1/active_streams) when no stream has
644/// transferred any bytes yet (total is zero).
645///
646/// Extracted as a free function to avoid borrow conflicts when `self` fields
647/// (e.g. `encoder`) are borrowed by the converter while we need to update
648/// per-stream metrics and connection overhead counters.
649fn distribute_overhead(
650 metrics: &mut SessionMetrics,
651 overhead_bin: &mut usize,
652 overhead_bout: &mut usize,
653 stream_bytes: (usize, usize),
654 total_bytes: (usize, usize),
655 active_streams: usize,
656 is_last_stream: bool,
657) {
658 let share_in = if is_last_stream {
659 // Last stream gets all remaining overhead to avoid losing remainder bytes
660 // from integer division across earlier streams.
661 *overhead_bin
662 } else if total_bytes.0 > 0 {
663 // Clamp to remaining overhead — integer division rounding across multiple
664 // streams can cause accumulated shares to exceed the total.
665 (*overhead_bin * stream_bytes.0 / total_bytes.0).min(*overhead_bin)
666 } else {
667 // No stream has transferred any inbound bytes — fall back to even split.
668 *overhead_bin / active_streams.max(1)
669 };
670 let share_out = if is_last_stream {
671 *overhead_bout
672 } else if total_bytes.1 > 0 {
673 (*overhead_bout * stream_bytes.1 / total_bytes.1).min(*overhead_bout)
674 } else {
675 // No stream has transferred any outbound bytes — fall back to even split.
676 *overhead_bout / active_streams.max(1)
677 };
678 // Pre-condition: a stream can never be credited more overhead than remains
679 // in the pool — otherwise the `*overhead_b* -= share_*` below underflows
680 // (usize wraps to a huge value, corrupting connection-overhead accounting).
681 // Every branch above either takes the whole pool (last stream) or `.min`s
682 // against it, so this must hold.
683 debug_assert!(
684 share_in <= *overhead_bin,
685 "overhead-in share must not exceed the remaining overhead pool"
686 );
687 debug_assert!(
688 share_out <= *overhead_bout,
689 "overhead-out share must not exceed the remaining overhead pool"
690 );
691 let before_bin = *overhead_bin;
692 let before_bout = *overhead_bout;
693 metrics.bin += share_in;
694 metrics.bout += share_out;
695 *overhead_bin -= share_in;
696 *overhead_bout -= share_out;
697 // Post-condition: the pool shrinks by exactly the credited share (overhead
698 // is conserved, neither created nor lost). The last stream drains it to 0.
699 debug_assert_eq!(
700 *overhead_bin,
701 before_bin - share_in,
702 "overhead-in pool must decrease by exactly the credited share"
703 );
704 debug_assert_eq!(
705 *overhead_bout,
706 before_bout - share_out,
707 "overhead-out pool must decrease by exactly the credited share"
708 );
709 debug_assert!(
710 !is_last_stream || (*overhead_bin == 0 && *overhead_bout == 0),
711 "the last stream must drain the overhead pool to zero (no lost remainder)"
712 );
713}
714
715/// LIFECYCLE §9 invariant 16 probe: returns `true` if any open stream still
716/// has outbound kawa bytes queued (`back.out` non-empty or `back.blocks`
717/// non-drained).
718///
719/// Used by `finalize_write` to preserve `Ready::WRITABLE` across a voluntary
720/// scheduler yield, and by `has_pending_write_full` to block shutdown-drain
721/// while bytes are still owed to the frontend.
722///
723/// `.get()` rather than direct indexing: an unknown `GlobalStreamId` is
724/// treated as "no pending bytes" rather than panicking — defence-in-depth
725/// against a stream-removal race during shutdown.
726fn any_stream_has_pending_back(
727 streams: &HashMap<StreamId, GlobalStreamId>,
728 context_streams: &[Stream],
729) -> bool {
730 any_stream_id_matches(streams, |gid| {
731 context_streams
732 .get(gid)
733 .is_some_and(|s| !s.back.out.is_empty() || !s.back.blocks.is_empty())
734 })
735}
736
737/// Iteration core of [`any_stream_has_pending_back`], split out so the
738/// invariant-16 dispatch is unit-testable without a full [`Stream`] fixture
739/// (the existing test module only covers `H2FloodDetector`).
740fn any_stream_id_matches<F>(streams: &HashMap<StreamId, GlobalStreamId>, mut probe: F) -> bool
741where
742 F: FnMut(GlobalStreamId) -> bool,
743{
744 streams.values().any(|gid| probe(*gid))
745}
746
747/// Collect the live streams that have exceeded `deadline` under either
748/// per-stream reap guard, deduped so a stream tripping both is reaped (and
749/// access-logged) exactly once. Split out from
750/// [`ConnectionH2::cancel_timed_out_streams`] so the two-guard union is
751/// unit-testable without a full `ConnectionH2` fixture (the existing test
752/// module only fixtures `H2FloodDetector` and `Stream`):
753///
754/// - `last_activity` — bidirectional-silence guard: no DATA/HEADERS in either
755/// direction (the slow-multiplex Slowloris timer).
756/// - `fc_stalled` — outbound-flow-control-starvation guard: a buffered response
757/// that cannot drain because the peer keeps its receive window shut (the
758/// HTTP/2 window-stall / WINDOW_UPDATE-drip vector). This guard is what the
759/// liveness timer misses: an inbound 1-byte DATA drip keeps `last_activity`
760/// warm, but never touches `fc_stalled`.
761///
762/// Streams not in `live_streams` or already in `rst_sent` are skipped. The
763/// returned reason string is the access-log tag for the guard that tripped
764/// first (idle takes precedence on a tie, purely for a stable label).
765fn collect_timed_out_streams(
766 last_activity: &HashMap<StreamId, Instant>,
767 fc_stalled: &HashMap<StreamId, Instant>,
768 live_streams: &HashMap<StreamId, GlobalStreamId>,
769 rst_sent: &HashSet<StreamId>,
770 now: Instant,
771 deadline: std::time::Duration,
772) -> Vec<(StreamId, &'static str)> {
773 let eligible = |sid: StreamId| live_streams.contains_key(&sid) && !rst_sent.contains(&sid);
774 let expired = |t: Instant| now.saturating_duration_since(t) > deadline;
775 let mut seen: HashSet<StreamId> = HashSet::new();
776 let mut out: Vec<(StreamId, &'static str)> = Vec::new();
777 for (&sid, &t) in last_activity {
778 if eligible(sid) && expired(t) && seen.insert(sid) {
779 out.push((sid, "H2::IdleTimeout"));
780 }
781 }
782 for (&sid, &t) in fc_stalled {
783 if eligible(sid) && expired(t) && seen.insert(sid) {
784 out.push((sid, "H2::WindowStall"));
785 }
786 }
787 out
788}
789
790/// True when a stream still has response/upload bytes that could be put on the
791/// wire — headers/body in flight, or a terminated-but-not-fully-flushed buffer.
792/// Deliberately EXCLUDES `is_error()`/`rst_sent`: that disjunct is specific to
793/// the priority-eligibility and write-loop gates (`write_streams`) and must stay
794/// inline there; this 2-clause helper backs ONLY the window-stall arm.
795fn has_sendable_response(kawa: &GenericHttpStream) -> bool {
796 kawa.is_main_phase() || (kawa.is_terminated() && !kawa.is_completed())
797}
798
799/// Outcome of the M2 cumulative-stall budget decision for one `write_streams`
800/// pass on a window-stalled stream. Extracted from the `write_streams` arm so
801/// the budget logic is unit-testable without a full `ConnectionH2` fixture
802/// (mirrors the [`collect_timed_out_streams`] extraction).
803#[derive(Debug, Clone, Copy, PartialEq, Eq)]
804enum FcStallAction {
805 /// Clear both the deadline (`stream_fc_stalled_since`) and the progress
806 /// accumulator (`stream_fc_stalled_progress`) for this stream.
807 Clear,
808 /// Ensure the deadline is armed (WITHOUT refreshing an existing `Instant`)
809 /// and set the progress accumulator to `progress`.
810 Arm { progress: usize },
811}
812
813/// Decide what to do with a stream's flow-control-stall deadline + cumulative
814/// progress accumulator on one write pass (M2 cumulative-stall budget).
815///
816/// - A genuinely open send window (`!outbound_window_blocked`) is a real
817/// un-stall → [`FcStallAction::Clear`].
818/// - While the window stays blocked, accumulate this pass's outbound drain
819/// (`consumed`, clamped to `>= 0`) onto `prior_progress`. Once the cumulative
820/// total reaches [`FC_STALL_CLEAR_FLOOR`] (a full DATA frame of real delivery)
821/// → `Clear`; otherwise `Arm` with the running total. A `WINDOW_UPDATE(+1)`
822/// drip adds ~1 byte/pass and never reaches the floor, so the deadline keeps
823/// aging and the reaper eventually fires.
824fn fc_stall_budget_decision(
825 outbound_window_blocked: bool,
826 consumed: i32,
827 prior_progress: Option<usize>,
828) -> FcStallAction {
829 if !outbound_window_blocked {
830 return FcStallAction::Clear;
831 }
832 let progressed = prior_progress
833 .unwrap_or(0)
834 .saturating_add(consumed.max(0) as usize);
835 if progressed >= FC_STALL_CLEAR_FLOOR {
836 FcStallAction::Clear
837 } else {
838 FcStallAction::Arm {
839 progress: progressed,
840 }
841 }
842}
843
844/// Core of [`ConnectionH2::enqueue_rst`], extracted so the RST-queueing
845/// semantics (dedupe, queued-cap counter bump, invariant-15 readiness rearm)
846/// can be unit-tested without building a full `ConnectionH2<Front>` fixture.
847///
848/// Invariants enforced:
849/// - **Dedupe** via `rst_sent`: at most one queued RST per wire stream id.
850/// `HashSet::insert` returns `false` when the id is already present; we
851/// short-circuit on that branch to keep `pending_rst_streams`,
852/// `total_rst_streams_queued` and the wire counts consistent.
853/// - **MadeYouReset queued cap** (`MAX_PENDING_RST_STREAMS`): each freshly
854/// queued RST bumps `total_rst_streams_queued`, which
855/// `flush_pending_control_frames` polices to escalate to
856/// `GOAWAY(ENHANCE_YOUR_CALM)` when exceeded.
857/// - **Invariant 15** (edge-triggered epoll): pair `Ready::WRITABLE` interest
858/// with the event bit so `writable()` is scheduled on the next tick.
859///
860/// Returns `true` when the RST was freshly queued, `false` when the
861/// stream was already in `rst_sent` (the caller asked to RST the same
862/// stream twice — a benign re-entrant idempotency, NOT a new wire
863/// emission). The boolean lets [`ConnectionH2::enqueue_rst`] account
864/// the RST only on the freshly-queued path so duplicate calls do not
865/// inflate the per-error counter or trip the MadeYouReset flood cap
866/// for frames that never reach the wire.
867fn enqueue_rst_into(
868 pending: &mut Vec<(StreamId, H2Error)>,
869 total: &mut usize,
870 rst_sent: &mut HashSet<StreamId>,
871 readiness: &mut Readiness,
872 wire_stream_id: StreamId,
873 error: H2Error,
874) -> bool {
875 let pending_before = pending.len();
876 let total_before = *total;
877 if !rst_sent.insert(wire_stream_id) {
878 // Dedupe short-circuit: the id was already queued/flushed. We must NOT
879 // touch any of the wire-count state, otherwise duplicate calls inflate
880 // the MadeYouReset (CVE-2025-8671) lifetime cap with frames that never
881 // reach the wire.
882 debug_assert!(
883 rst_sent.contains(&wire_stream_id),
884 "dedupe path requires the id to already be present in rst_sent"
885 );
886 debug_assert_eq!(
887 pending.len(),
888 pending_before,
889 "dedupe path must not enqueue a new pending RST"
890 );
891 debug_assert_eq!(
892 *total, total_before,
893 "dedupe path must not bump the queued-RST lifetime counter"
894 );
895 return false;
896 }
897 pending.push((wire_stream_id, error));
898 *total += 1;
899 readiness.arm_writable();
900 // Post-condition: a freshly-queued RST advances both the pending Vec and the
901 // lifetime counter by exactly one, and the id is now tracked for dedupe.
902 debug_assert!(
903 rst_sent.contains(&wire_stream_id),
904 "freshly-queued RST must be recorded in rst_sent for future dedupe"
905 );
906 debug_assert_eq!(
907 pending.len(),
908 pending_before + 1,
909 "a freshly-queued RST must push exactly one pending entry"
910 );
911 debug_assert_eq!(
912 *total,
913 total_before + 1,
914 "a freshly-queued RST must bump the queued-RST lifetime counter by one"
915 );
916 debug_assert_eq!(
917 pending.last().map(|(id, _)| *id),
918 Some(wire_stream_id),
919 "the just-pushed entry must be the requested wire stream id"
920 );
921 true
922}
923
924/// Detail of a flood-threshold violation returned by
925/// [`H2FloodDetector::check_flood`] and [`H2FloodDetector::record_rst_lifetime`].
926///
927/// Carrying `(reason, count, threshold)` lets the caller emit a session-scoped
928/// log line with full context — the detector itself is connection-agnostic and
929/// never logs.
930#[derive(Debug, Clone, PartialEq)]
931pub struct H2FloodViolation {
932 /// HTTP/2 error code to emit on the GOAWAY.
933 pub error: H2Error,
934 /// Human-readable name of the counter that tripped (e.g. `"RST_STREAM"`).
935 pub reason: &'static str,
936 /// Statsd metric key emitted by [`ConnectionH2::handle_flood_violation`].
937 /// Carried alongside `reason` so a single field maps to both the log line
938 /// and the dashboard counter — adding a new violation kind requires
939 /// choosing both at the construction site, preventing drift.
940 pub metric_key: &'static str,
941 /// Observed counter value at the moment of detection.
942 pub count: u64,
943 /// Configured ceiling that was crossed.
944 pub threshold: u64,
945}
946
947/// Tracks per-connection frame rates to detect and mitigate H2 flood attacks.
948///
949/// Monitors RST_STREAM (CVE-2023-44487), PING (CVE-2019-9512), SETTINGS (CVE-2019-9515),
950/// empty DATA (CVE-2019-9518), and CONTINUATION (CVE-2024-27316) flood patterns.
951/// When any counter exceeds its threshold, `check_flood()` returns the violation
952/// detail so callers can log with connection context before sending GOAWAY.
953///
954/// Thresholds are configurable via [`H2FloodConfig`], with safe defaults matching
955/// the original compile-time constants.
956#[derive(Debug)]
957pub struct H2FloodDetector {
958 /// RST_STREAM frames received in current window (CVE-2023-44487 + CVE-2019-9514)
959 pub(super) rst_stream_count: u32,
960 /// Lifetime RST_STREAM frames received on this connection.
961 ///
962 /// Never decays — provides an absolute ceiling that the half-decaying
963 /// per-window counter cannot, preventing a sustained ~50 RST/sec burst
964 /// from running forever.
965 pub(super) total_rst_received_lifetime: u64,
966 /// Lifetime RST_STREAM frames received that targeted a stream whose
967 /// backend response had not yet started. These are the "Rapid Reset"
968 /// signature — cheap for the attacker, expensive for the proxy — and
969 /// trip on a much lower ceiling than the generic lifetime counter.
970 pub(super) total_abusive_rst_received_lifetime: u64,
971 /// Lifetime RST_STREAM frames **emitted by the server** on this
972 /// connection (CVE-2025-8671 "MadeYouReset" mitigation). Incremented
973 /// inside [`ConnectionH2::reset_stream`] whenever a non-`NoError` reset
974 /// is triggered by an attacker-crafted frame (content-length mismatch,
975 /// header parse error, priority rejection, zero-increment WINDOW_UPDATE
976 /// on an open stream). Never decays — provides an absolute ceiling that
977 /// short-circuits patient-attacker patterns that stay under any windowed
978 /// counter.
979 pub(super) total_rst_streams_emitted_lifetime: u64,
980 /// PING frames received in current window (CVE-2019-9512)
981 pub(super) ping_count: u32,
982 /// Lifetime PING frames received on this connection.
983 ///
984 /// Never decays — provides an absolute ceiling that the half-decaying
985 /// per-window counter cannot, preventing sustained low-rate PING abuse.
986 pub(super) total_ping_received_lifetime: u32,
987 /// SETTINGS frames received in current window (CVE-2019-9515)
988 pub(super) settings_count: u32,
989 /// Lifetime SETTINGS frames received on this connection.
990 ///
991 /// Never decays — provides an absolute ceiling that the half-decaying
992 /// per-window counter cannot, preventing sustained low-rate SETTINGS abuse.
993 pub(super) total_settings_received_lifetime: u32,
994 /// Empty DATA frames received in current window (CVE-2019-9518)
995 pub(super) empty_data_count: u32,
996 /// Connection-level (stream 0) WINDOW_UPDATE frames received in current
997 /// sliding window. Half-decays with [`maybe_reset_window`] like other
998 /// rate counters. Increments on non-zero stream-0 WINDOW_UPDATEs only —
999 /// zero-increment frames short-circuit into GOAWAY(PROTOCOL_ERROR) per
1000 /// RFC 9113 §6.9 before reaching this counter.
1001 pub(super) window_update_stream0_count: u32,
1002 /// CONTINUATION frames received for current header block (CVE-2024-27316)
1003 pub(super) continuation_count: u32,
1004 /// Total accumulated header block size across CONTINUATION frames
1005 pub(super) accumulated_header_size: u32,
1006 /// General anomaly counter
1007 pub(super) glitch_count: u32,
1008 /// Window start for rate-based counters
1009 pub(super) window_start: Instant,
1010 /// Configurable thresholds for flood detection
1011 pub(super) config: H2FloodConfig,
1012}
1013
1014impl Default for H2FloodDetector {
1015 fn default() -> Self {
1016 Self::new(H2FloodConfig::default())
1017 }
1018}
1019
1020impl H2FloodDetector {
1021 pub fn new(config: H2FloodConfig) -> Self {
1022 // Pre-condition: thresholds are already validated (clamped to >= 1 by
1023 // `H2FloodConfig::new`). A zero per-window threshold would trip on the
1024 // first counted frame; assert it here so a config that bypassed `new`
1025 // (raw struct literal in a future caller) is caught in debug.
1026 debug_assert!(
1027 config.max_rst_stream_per_window >= 1
1028 && config.max_ping_per_window >= 1
1029 && config.max_settings_per_window >= 1
1030 && config.max_continuation_frames >= 1
1031 && config.max_glitch_count >= 1,
1032 "flood detector must be constructed with validated (>= 1) thresholds"
1033 );
1034 Self {
1035 rst_stream_count: 0,
1036 total_rst_received_lifetime: 0,
1037 total_abusive_rst_received_lifetime: 0,
1038 total_rst_streams_emitted_lifetime: 0,
1039 ping_count: 0,
1040 total_ping_received_lifetime: 0,
1041 settings_count: 0,
1042 total_settings_received_lifetime: 0,
1043 empty_data_count: 0,
1044 window_update_stream0_count: 0,
1045 continuation_count: 0,
1046 accumulated_header_size: 0,
1047 glitch_count: 0,
1048 window_start: Instant::now(),
1049 config,
1050 }
1051 }
1052
1053 /// Increment the lifetime RST_STREAM counters and return a
1054 /// [`H2FloodViolation`] if either the global or the abusive
1055 /// (pre-response-start) lifetime cap has been exceeded.
1056 ///
1057 /// `response_started` indicates whether the backend response had already
1058 /// begun when the RST arrived; `false` is the cheap-for-client /
1059 /// expensive-for-us Rapid Reset signature (CVE-2023-44487).
1060 pub fn record_rst_lifetime(&mut self, response_started: bool) -> Option<H2FloodViolation> {
1061 let total_before = self.total_rst_received_lifetime;
1062 let abusive_before = self.total_abusive_rst_received_lifetime;
1063 self.total_rst_received_lifetime = self.total_rst_received_lifetime.saturating_add(1);
1064 if !response_started {
1065 self.total_abusive_rst_received_lifetime =
1066 self.total_abusive_rst_received_lifetime.saturating_add(1);
1067 }
1068 // Monotonicity: the global lifetime counter advances by one per call
1069 // (until saturation), and the abusive sub-counter advances iff the RST
1070 // arrived before the backend response started. The abusive counter can
1071 // never exceed the global one — every abusive RST is also a received RST.
1072 debug_assert!(
1073 self.total_rst_received_lifetime >= total_before,
1074 "lifetime RST counter must be monotonic non-decreasing"
1075 );
1076 debug_assert_eq!(
1077 self.total_abusive_rst_received_lifetime > abusive_before,
1078 !response_started,
1079 "abusive RST counter advances iff the RST is pre-response-start"
1080 );
1081 debug_assert!(
1082 self.total_abusive_rst_received_lifetime <= self.total_rst_received_lifetime,
1083 "abusive RST count is a subset of total received RST count"
1084 );
1085 if self.total_rst_received_lifetime > self.config.max_rst_stream_lifetime {
1086 return Some(H2FloodViolation {
1087 error: H2Error::EnhanceYourCalm,
1088 reason: "Rapid Reset: lifetime RST_STREAM",
1089 metric_key: "h2.flood.violation.rst_stream_lifetime",
1090 count: self.total_rst_received_lifetime,
1091 threshold: self.config.max_rst_stream_lifetime,
1092 });
1093 }
1094 if self.total_abusive_rst_received_lifetime > self.config.max_rst_stream_abusive_lifetime {
1095 return Some(H2FloodViolation {
1096 error: H2Error::EnhanceYourCalm,
1097 reason: "Rapid Reset: lifetime pre-response RST_STREAM",
1098 metric_key: "h2.flood.violation.rst_stream_pre_response_lifetime",
1099 count: self.total_abusive_rst_received_lifetime,
1100 threshold: self.config.max_rst_stream_abusive_lifetime,
1101 });
1102 }
1103 None
1104 }
1105
1106 /// Increment the lifetime **server-emitted** RST_STREAM counter and
1107 /// return a [`H2FloodViolation`] once the configured ceiling is exceeded.
1108 ///
1109 /// Call sites are the error paths inside [`ConnectionH2::reset_stream`]
1110 /// where an attacker-crafted frame coerces the server into emitting a
1111 /// RST_STREAM (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
1112 /// are reported — callers must exclude graceful cancels.
1113 pub fn record_rst_emitted(&mut self) -> Option<H2FloodViolation> {
1114 let before = self.total_rst_streams_emitted_lifetime;
1115 self.total_rst_streams_emitted_lifetime =
1116 self.total_rst_streams_emitted_lifetime.saturating_add(1);
1117 // Monotonic: the emitted-RST counter never decays (it is the absolute
1118 // MadeYouReset ceiling, CVE-2025-8671), so each call strictly advances
1119 // it until u64 saturation.
1120 debug_assert!(
1121 self.total_rst_streams_emitted_lifetime > before || before == u64::MAX,
1122 "emitted-RST lifetime counter must advance (or already be saturated)"
1123 );
1124 if self.total_rst_streams_emitted_lifetime > self.config.max_rst_stream_emitted_lifetime {
1125 return Some(H2FloodViolation {
1126 error: H2Error::EnhanceYourCalm,
1127 reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
1128 metric_key: "h2.flood.violation.rst_stream_emitted_lifetime",
1129 count: self.total_rst_streams_emitted_lifetime,
1130 threshold: self.config.max_rst_stream_emitted_lifetime,
1131 });
1132 }
1133 None
1134 }
1135
1136 /// Half-decay rate-based counters if the current window has expired.
1137 /// Uses half-window decay instead of full reset to catch burst-then-wait attacks.
1138 fn maybe_reset_window(&mut self) {
1139 if self.window_start.elapsed() >= FLOOD_WINDOW_DURATION {
1140 let (rst_before, ping_before, settings_before) =
1141 (self.rst_stream_count, self.ping_count, self.settings_count);
1142 let (empty_before, wu0_before, glitch_before) = (
1143 self.empty_data_count,
1144 self.window_update_stream0_count,
1145 self.glitch_count,
1146 );
1147 self.rst_stream_count /= 2;
1148 self.ping_count /= 2;
1149 self.settings_count /= 2;
1150 self.empty_data_count /= 2;
1151 self.window_update_stream0_count /= 2;
1152 self.glitch_count /= 2;
1153 self.window_start = Instant::now();
1154 // Half-decay invariant: each rate-based counter is exactly halved
1155 // (integer division), never increased. Catching burst-then-wait
1156 // attacks relies on the counter shrinking but not vanishing — a
1157 // full reset would let a patient attacker reset to zero each window.
1158 debug_assert_eq!(self.rst_stream_count, rst_before / 2, "RST count halves");
1159 debug_assert_eq!(self.ping_count, ping_before / 2, "PING count halves");
1160 debug_assert_eq!(
1161 self.settings_count,
1162 settings_before / 2,
1163 "SETTINGS count halves"
1164 );
1165 debug_assert_eq!(
1166 self.empty_data_count,
1167 empty_before / 2,
1168 "empty-DATA count halves"
1169 );
1170 debug_assert_eq!(
1171 self.window_update_stream0_count,
1172 wu0_before / 2,
1173 "stream-0 WINDOW_UPDATE count halves"
1174 );
1175 debug_assert_eq!(self.glitch_count, glitch_before / 2, "glitch count halves");
1176 // The lifetime counters are deliberately NOT touched here — they are
1177 // the never-decaying ceilings. Guard against a future edit decaying
1178 // them by accident.
1179 debug_assert!(
1180 self.window_start.elapsed() < FLOOD_WINDOW_DURATION,
1181 "window_start must be refreshed to (approximately) now after decay"
1182 );
1183 }
1184 }
1185
1186 /// Check all flood counters. Returns a [`H2FloodViolation`] when a threshold
1187 /// is exceeded; the caller is responsible for logging with session context
1188 /// and escalating to GOAWAY.
1189 pub fn check_flood(&mut self) -> Option<H2FloodViolation> {
1190 self.maybe_reset_window();
1191
1192 fn flag(
1193 reason: &'static str,
1194 metric_key: &'static str,
1195 count: u32,
1196 threshold: u32,
1197 ) -> Option<H2FloodViolation> {
1198 if count > threshold {
1199 Some(H2FloodViolation {
1200 error: H2Error::EnhanceYourCalm,
1201 reason,
1202 metric_key,
1203 count: count as u64,
1204 threshold: threshold as u64,
1205 })
1206 } else {
1207 None
1208 }
1209 }
1210
1211 let violation = flag(
1212 "RST_STREAM",
1213 "h2.flood.violation.rst_stream_window",
1214 self.rst_stream_count,
1215 self.config.max_rst_stream_per_window,
1216 )
1217 .or_else(|| {
1218 flag(
1219 "PING",
1220 "h2.flood.violation.ping_window",
1221 self.ping_count,
1222 self.config.max_ping_per_window,
1223 )
1224 })
1225 .or_else(|| {
1226 flag(
1227 "PING lifetime",
1228 "h2.flood.violation.ping_lifetime",
1229 self.total_ping_received_lifetime,
1230 DEFAULT_MAX_PING_LIFETIME,
1231 )
1232 })
1233 .or_else(|| {
1234 flag(
1235 "SETTINGS",
1236 "h2.flood.violation.settings_window",
1237 self.settings_count,
1238 self.config.max_settings_per_window,
1239 )
1240 })
1241 .or_else(|| {
1242 flag(
1243 "SETTINGS lifetime",
1244 "h2.flood.violation.settings_lifetime",
1245 self.total_settings_received_lifetime,
1246 DEFAULT_MAX_SETTINGS_LIFETIME,
1247 )
1248 })
1249 .or_else(|| {
1250 flag(
1251 "empty DATA",
1252 "h2.flood.violation.empty_data_window",
1253 self.empty_data_count,
1254 self.config.max_empty_data_per_window,
1255 )
1256 })
1257 .or_else(|| {
1258 flag(
1259 "CONTINUATION",
1260 "h2.flood.violation.continuation_per_block",
1261 self.continuation_count,
1262 self.config.max_continuation_frames,
1263 )
1264 })
1265 .or_else(|| {
1266 flag(
1267 "WINDOW_UPDATE stream 0",
1268 "h2.flood.violation.window_update_stream0_window",
1269 self.window_update_stream0_count,
1270 self.config.max_window_update_stream0_per_window,
1271 )
1272 })
1273 .or_else(|| {
1274 flag(
1275 "accumulated header size",
1276 "h2.flood.violation.header_size_per_block",
1277 self.accumulated_header_size,
1278 self.config.max_header_list_size,
1279 )
1280 })
1281 .or_else(|| {
1282 flag(
1283 "glitch",
1284 "h2.flood.violation.glitch_window",
1285 self.glitch_count,
1286 self.config.max_glitch_count,
1287 )
1288 });
1289 // Post-condition: any reported violation is well-formed — every H2
1290 // flood escalation is an ENHANCE_YOUR_CALM connection error, and the
1291 // observed count strictly exceeds the threshold it tripped (the `flag`
1292 // helper and the lifetime checks all use strict `>`). A violation whose
1293 // count <= threshold would be a false positive terminating a healthy
1294 // connection.
1295 debug_assert!(
1296 violation
1297 .as_ref()
1298 .is_none_or(|v| v.error == H2Error::EnhanceYourCalm && v.count > v.threshold),
1299 "a flood violation must be EnhanceYourCalm with count strictly above threshold"
1300 );
1301 violation
1302 }
1303
1304 /// Reset CONTINUATION-specific counters when a header block is complete.
1305 pub fn reset_continuation(&mut self) {
1306 self.continuation_count = 0;
1307 self.accumulated_header_size = 0;
1308 // Post-condition: both CONTINUATION-block accumulators are cleared so
1309 // the next header block starts from zero (CVE-2024-27316 per-block
1310 // accounting must not leak across blocks).
1311 debug_assert_eq!(
1312 self.continuation_count, 0,
1313 "continuation_count must be zero after a block completes"
1314 );
1315 debug_assert_eq!(
1316 self.accumulated_header_size, 0,
1317 "accumulated_header_size must be zero after a block completes"
1318 );
1319 }
1320}
1321
1322#[derive(Debug)]
1323pub enum H2State {
1324 ClientPreface,
1325 ClientSettings,
1326 ServerSettings,
1327 Header,
1328 Frame(FrameHeader),
1329 ContinuationHeader(Headers),
1330 ContinuationFrame(Headers),
1331 GoAway,
1332 Error,
1333 Discard,
1334}
1335
1336#[derive(Debug, Clone, Copy)]
1337pub struct H2Settings {
1338 pub settings_header_table_size: u32,
1339 pub settings_enable_push: bool,
1340 pub settings_max_concurrent_streams: u32,
1341 pub settings_initial_window_size: u32,
1342 pub settings_max_frame_size: u32,
1343 pub settings_max_header_list_size: u32,
1344 /// RFC 8441
1345 pub settings_enable_connect_protocol: bool,
1346 /// RFC 9218
1347 pub settings_no_rfc7540_priorities: bool,
1348}
1349
1350impl Default for H2Settings {
1351 fn default() -> Self {
1352 Self {
1353 settings_header_table_size: DEFAULT_HEADER_TABLE_SIZE,
1354 settings_enable_push: false,
1355 settings_max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
1356 settings_initial_window_size: DEFAULT_INITIAL_WINDOW_SIZE,
1357 settings_max_frame_size: DEFAULT_MAX_FRAME_SIZE,
1358 settings_max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
1359 settings_enable_connect_protocol: false,
1360 settings_no_rfc7540_priorities: true,
1361 }
1362 }
1363}
1364
1365/// RFC 9218 Extensible Priorities for HTTP stream scheduling.
1366///
1367/// Stores per-stream urgency (0-7, lower = more important) and incremental
1368/// flag. Used by `writable()` to sort streams: lower urgency first, then
1369/// stream ID for stability among same-urgency non-incremental streams.
1370///
1371/// Within a same-urgency bucket the scheduler (see
1372/// [`ConnectionH2::write_streams`]) drains non-incremental streams
1373/// sequentially, then applies RFC 9218 §4 round-robin to the incremental
1374/// streams starting from [`Self::incremental_cursor`], so multiple concurrent
1375/// downloads at the same urgency interleave their DATA frames fairly.
1376///
1377/// Streams without an explicit `priority` header get the RFC 9218 defaults:
1378/// urgency 3, incremental false.
1379#[derive(Default)]
1380pub struct Prioriser {
1381 /// Per-stream priority: stream_id -> (urgency 0-7, incremental flag)
1382 priorities: HashMap<StreamId, (u8, bool)>,
1383 /// RFC 9218 §4 round-robin cursor: stream ID that fired first in the
1384 /// last write pass over the incremental tail of the lowest-urgency
1385 /// bucket that contained at least one incremental stream. The next pass
1386 /// starts from the stream immediately after this ID (wrapping around),
1387 /// so a single slow-draining stream cannot hog the connection.
1388 ///
1389 /// `0` is the "no cursor yet" sentinel and means "start from the
1390 /// smallest ID in the bucket" — H2 stream IDs are always > 0.
1391 incremental_cursor: StreamId,
1392}
1393
1394/// RFC 9218 §4 default urgency value.
1395const DEFAULT_URGENCY: u8 = 3;
1396
1397/// Maximum entries in the priority map to prevent flooding via PRIORITY frames.
1398const MAX_PRIORITIES: usize = 4096;
1399
1400/// Small look-ahead window (in stream IDs) for PRIORITY frames that arrive
1401/// slightly before the peer opens the corresponding stream. RFC 9218 allows
1402/// PRIORITY to be sent for an idle stream that the peer intends to open
1403/// soon. Past this budget we assume the ID will never be used and drop the
1404/// entry, preventing flooding with far-future stream IDs.
1405const PRIORITY_IDLE_LOOKAHEAD: u32 = 64;
1406
1407impl Prioriser {
1408 /// Record or update the priority for a stream that we know exists or are
1409 /// currently processing (used from pkawa's header-handling path where the
1410 /// owning stream's HEADERS frame is being decoded).
1411 ///
1412 /// Returns `true` if the priority is invalid (self-dependency for RFC 7540),
1413 /// signalling the caller should reset the stream with a protocol error.
1414 pub fn push_priority(&mut self, stream_id: StreamId, priority: parser::PriorityPart) -> bool {
1415 trace!(
1416 "{} PRIORITY REQUEST FOR {}: {:?}",
1417 log_module_context!(),
1418 stream_id,
1419 priority
1420 );
1421 // Pre-condition: the priority map never grows past MAX_PRIORITIES.
1422 // The cap is the only thing standing between a PRIORITY flood and
1423 // unbounded memory; assert it holds on entry (each insert path below
1424 // either updates an existing key or is gated by this check).
1425 debug_assert!(
1426 self.priorities.len() <= MAX_PRIORITIES,
1427 "priority map must never exceed MAX_PRIORITIES entries"
1428 );
1429 // Cap the priority map to prevent flooding via PRIORITY frames
1430 if !self.priorities.contains_key(&stream_id) && self.priorities.len() >= MAX_PRIORITIES {
1431 return false;
1432 }
1433 match priority {
1434 parser::PriorityPart::Rfc7540 {
1435 stream_dependency,
1436 weight: _,
1437 } => {
1438 // RFC 9113 §5.3.1: a stream cannot depend on itself; signal
1439 // the caller to RST_STREAM with PROTOCOL_ERROR. Otherwise the
1440 // RFC 7540 priority tree is deprecated and silently ignored.
1441 stream_dependency.stream_id == stream_id
1442 }
1443 parser::PriorityPart::Rfc9218 {
1444 urgency,
1445 incremental,
1446 } => {
1447 // RFC 9218 §7.1: a malformed or out-of-range priority field
1448 // MUST be "treated as absent", NOT as a stream error. Clamping
1449 // an urgency > 7 to 7 is the policy-correct interpretation:
1450 // the field is still present (so defaulting would lose
1451 // information) but its value is normalised to the RFC's
1452 // allowed range [0..=7]. Intentionally not PROTOCOL_ERROR.
1453 self.priorities
1454 .insert(stream_id, (urgency.min(7), incremental));
1455 // Post-conditions: the entry now exists with a clamped urgency
1456 // in [0, 7] (the writable scheduler buckets by urgency and would
1457 // mis-order on a value above 7), and the map stays within its
1458 // memory cap.
1459 debug_assert!(
1460 self.priorities
1461 .get(&stream_id)
1462 .is_some_and(|(u, _)| *u <= 7),
1463 "stored RFC 9218 urgency must be clamped to [0, 7]"
1464 );
1465 debug_assert!(
1466 self.priorities.len() <= MAX_PRIORITIES,
1467 "priority map must stay within MAX_PRIORITIES after insert"
1468 );
1469 false
1470 }
1471 }
1472 }
1473
1474 /// Record or update the priority for a stream ID that arrived via a
1475 /// standalone PRIORITY frame.
1476 ///
1477 /// Pass 3 Medium #4: without this guard, a peer could send PRIORITY for
1478 /// arbitrary stream IDs (e.g. 2^31 ever-increasing IDs) and pin up to
1479 /// `MAX_PRIORITIES` entries of memory. Accept only:
1480 /// - an ID that corresponds to a currently-open stream (`open_streams`);
1481 /// - an idle ID slightly ahead of `last_stream_id` (within
1482 /// [`PRIORITY_IDLE_LOOKAHEAD`]), matching RFC 9218's "set priority for
1483 /// a stream about to be opened" pattern.
1484 ///
1485 /// IDs in the past that we do not currently track (already closed) and
1486 /// IDs too far in the future are silently dropped. The `MAX_PRIORITIES`
1487 /// ceiling is preserved as a defensive backstop if both filters are ever
1488 /// circumvented.
1489 ///
1490 /// Returns the same value semantics as [`Self::push_priority`].
1491 pub fn push_priority_guarded(
1492 &mut self,
1493 stream_id: StreamId,
1494 priority: parser::PriorityPart,
1495 last_stream_id: StreamId,
1496 open_streams: &HashMap<StreamId, GlobalStreamId>,
1497 ) -> bool {
1498 if !self.is_acceptable(stream_id, last_stream_id, open_streams) {
1499 trace!(
1500 "{} PRIORITY dropped for unknown/far stream {} (last_stream_id={})",
1501 log_module_context!(),
1502 stream_id,
1503 last_stream_id
1504 );
1505 return false;
1506 }
1507 self.push_priority(stream_id, priority)
1508 }
1509
1510 fn is_acceptable(
1511 &self,
1512 stream_id: StreamId,
1513 last_stream_id: StreamId,
1514 open_streams: &HashMap<StreamId, GlobalStreamId>,
1515 ) -> bool {
1516 if open_streams.contains_key(&stream_id) {
1517 return true;
1518 }
1519 // Idle stream ahead of the current counter: accept a small look-ahead.
1520 // Past IDs that are NOT in `open_streams` are closed — drop them.
1521 let upper = last_stream_id.saturating_add(PRIORITY_IDLE_LOOKAHEAD);
1522 stream_id > last_stream_id && stream_id <= upper
1523 }
1524
1525 /// Remove a stream's priority entry (called when the stream is recycled).
1526 pub fn remove(&mut self, stream_id: &StreamId) {
1527 let had = self.priorities.contains_key(stream_id);
1528 let before = self.priorities.len();
1529 self.priorities.remove(stream_id);
1530 // Post-conditions: the entry is truly gone, and the map shrinks by
1531 // exactly one iff it was present. A leak here re-introduces the
1532 // PRIORITY-flood memory exposure the cap defends against.
1533 debug_assert!(
1534 !self.priorities.contains_key(stream_id),
1535 "remove must evict the priority entry"
1536 );
1537 debug_assert_eq!(
1538 self.priorities.len(),
1539 before - had as usize,
1540 "priority map length drops by exactly one iff the id was present"
1541 );
1542 }
1543
1544 /// Look up the priority for a stream, returning RFC 9218 defaults if absent.
1545 #[inline]
1546 pub fn get(&self, stream_id: &StreamId) -> (u8, bool) {
1547 self.priorities
1548 .get(stream_id)
1549 .copied()
1550 .unwrap_or((DEFAULT_URGENCY, false))
1551 }
1552
1553 /// Reorder a pre-sorted slice of writable stream IDs so that inside each
1554 /// urgency bucket, incremental streams appear after non-incremental ones,
1555 /// and the incremental tail is rotated by [`Self::incremental_cursor`]
1556 /// (RFC 9218 §4).
1557 ///
1558 /// The input `buf` must already be sorted by `(urgency, stream_id)`:
1559 /// this routine only partitions and rotates inside same-urgency
1560 /// contiguous runs, it does not re-sort.
1561 ///
1562 /// Returns the total number of incremental streams seen, so callers that
1563 /// need to update the cursor at the end of the write pass can early-exit
1564 /// when the count is zero.
1565 pub fn apply_incremental_rotation(&self, buf: &mut [StreamId]) -> usize {
1566 // Pre-condition: callers must hand a slice already sorted by urgency so
1567 // same-urgency runs are contiguous (this routine only partitions/rotates
1568 // within a run, it does not re-sort across urgencies). A non-monotonic
1569 // urgency sequence would split one logical bucket into several and
1570 // mis-schedule the round-robin. `windows(2)` over a slice of size N is
1571 // dead code in release.
1572 #[cfg(debug_assertions)]
1573 debug_assert!(
1574 buf.windows(2)
1575 .all(|w| self.get(&w[0]).0 <= self.get(&w[1]).0),
1576 "apply_incremental_rotation requires input pre-sorted by urgency"
1577 );
1578 let len_before = buf.len();
1579 #[cfg(debug_assertions)]
1580 let expected_incremental = buf.iter().filter(|id| self.get(id).1).count();
1581 let mut total_incremental = 0usize;
1582 let mut i = 0;
1583 while i < buf.len() {
1584 let (urgency_i, _) = self.get(&buf[i]);
1585 let mut j = i + 1;
1586 while j < buf.len() {
1587 let (urgency_j, _) = self.get(&buf[j]);
1588 if urgency_j != urgency_i {
1589 break;
1590 }
1591 j += 1;
1592 }
1593 // `buf[i..j]` is a contiguous run of same-urgency stream IDs.
1594 let bucket = &mut buf[i..j];
1595 if bucket.len() > 1 {
1596 // Stable partition: non-incremental first, incremental last,
1597 // each subrange staying in ascending stream-id order.
1598 bucket.sort_by_key(|id| self.get(id).1);
1599 let split = bucket.partition_point(|id| !self.get(id).1);
1600 let incremental_tail = &mut bucket[split..];
1601 if incremental_tail.len() > 1 {
1602 // Rotate so the pass starts right after the stream that
1603 // fired first previously. `partition_point` returns the
1604 // first index whose stream ID > cursor (so cursor itself
1605 // is still drained, but after the streams ahead of it).
1606 let start =
1607 incremental_tail.partition_point(|id| *id <= self.incremental_cursor);
1608 incremental_tail.rotate_left(start);
1609 }
1610 total_incremental += incremental_tail.len();
1611 } else if bucket.len() == 1 && self.get(&bucket[0]).1 {
1612 total_incremental += 1;
1613 }
1614 i = j;
1615 }
1616 // Post-conditions: the routine is a permutation — it reorders in place
1617 // and never drops a stream id (len unchanged), and the returned count is
1618 // exactly the number of incremental streams present (the cursor-advance
1619 // callers rely on this being the true incremental-tail size).
1620 debug_assert_eq!(
1621 buf.len(),
1622 len_before,
1623 "rotation must preserve the slice (no streams dropped or added)"
1624 );
1625 #[cfg(debug_assertions)]
1626 debug_assert_eq!(
1627 total_incremental, expected_incremental,
1628 "reported incremental count must equal the incremental streams in buf"
1629 );
1630 total_incremental
1631 }
1632
1633 /// Advance the RFC 9218 §4 round-robin cursor after a write pass.
1634 ///
1635 /// `first_incremental_fired` is the stream ID that headed the incremental
1636 /// tail we just drained; the next pass will start at the next stream
1637 /// after that ID. Callers may pass `None` when no incremental streams
1638 /// were eligible, leaving the cursor where it was.
1639 pub fn advance_incremental_cursor(&mut self, first_incremental_fired: Option<StreamId>) {
1640 if let Some(id) = first_incremental_fired {
1641 self.incremental_cursor = id;
1642 }
1643 }
1644}
1645
1646/// Connection-level flow control state (RFC 9113 §6.9).
1647pub struct H2FlowControl {
1648 /// Connection-level send window (can go negative per RFC 9113 §6.9.2).
1649 pub window: i32,
1650 /// Bytes received since last connection-level WINDOW_UPDATE.
1651 pub received_bytes_since_update: u32,
1652 /// Queued stream_id -> accumulated increment for WINDOW_UPDATE frames (O(1) coalescing).
1653 pub pending_window_updates: HashMap<u32, u32>,
1654}
1655
1656/// Byte accounting for connection overhead attribution.
1657pub struct H2ByteAccounting {
1658 /// Bytes read on the zero stream not yet attributed to a stream.
1659 pub zero_bytes_read: usize,
1660 /// Overhead bytes received (connection-level frames).
1661 pub overhead_bin: usize,
1662 /// Overhead bytes sent (connection-level frames).
1663 pub overhead_bout: usize,
1664}
1665
1666/// Connection draining state for graceful shutdown.
1667pub struct H2DrainState {
1668 /// True when we've sent GOAWAY and are draining.
1669 pub draining: bool,
1670 /// Last stream ID from peer's GOAWAY (for retry decisions).
1671 pub peer_last_stream_id: Option<StreamId>,
1672 /// Wall-clock timestamp captured the first time this connection entered
1673 /// `draining` during soft-stop. Used together with
1674 /// [`Self::graceful_shutdown_deadline`] to decide when to force-close.
1675 /// Remains `None` until the proxy-initiated drain begins (peer-initiated
1676 /// drains via `handle_goaway_frame` don't arm the forced-close timer —
1677 /// the caller in `Mux::shutting_down` is the only writer).
1678 pub started_at: Option<Instant>,
1679 /// Wall-clock budget granted to in-flight streams after the initial
1680 /// `GOAWAY(NO_ERROR)`. `None` means "wait indefinitely" (knob value `0`).
1681 /// Default when unset upstream: 5 s (see `L7ListenerHandler`).
1682 pub graceful_shutdown_deadline: Option<std::time::Duration>,
1683}
1684
1685pub struct ConnectionH2<Front: SocketHandler> {
1686 /// Connection/session ULID propagated from the parent [`Mux`]. Used to
1687 /// stamp the session slot of the `[session req cluster backend]` log
1688 /// prefix emitted by this module's `log_context!` / `log_context_stream!`
1689 /// macros.
1690 pub session_ulid: Ulid,
1691 pub decoder: loona_hpack::Decoder<'static>,
1692 pub encoder: loona_hpack::Encoder<'static>,
1693 pub expect_read: Option<(H2StreamId, usize)>,
1694 pub expect_write: Option<H2StreamId>,
1695 pub last_stream_id: StreamId,
1696 pub local_settings: H2Settings,
1697 pub peer_settings: H2Settings,
1698 pub position: Position,
1699 pub prioriser: Prioriser,
1700 pub readiness: Readiness,
1701 pub socket: Front,
1702 pub state: H2State,
1703 pub streams: HashMap<StreamId, GlobalStreamId>,
1704 pub timeout_container: TimeoutContainer,
1705 /// Connection-level flow control state (send window, receive tracking, pending updates).
1706 pub flow_control: H2FlowControl,
1707 /// Highest stream ID accepted from the peer (used for GoAway last_stream_id).
1708 pub highest_peer_stream_id: StreamId,
1709 /// RFC 7541 §4.2 / §6.3 pending dynamic-table-size-update signal.
1710 ///
1711 /// `Some(new_size)` when a peer SETTINGS frame adjusted
1712 /// `SETTINGS_HEADER_TABLE_SIZE` and we have not yet prepended the
1713 /// matching `001xxxxx` HPACK directive to a header block. Consumed and
1714 /// cleared by [`H2BlockConverter::emit_pending_size_update_if_new_block`]
1715 /// on the next `Block::StatusLine` or `Block::Header` encoded for the
1716 /// connection. Until then the peer's decoder still has its previous
1717 /// (possibly larger) table cap, so emitting is a correctness
1718 /// requirement, not a nicety — see the RFC 9113 encoder-decoder
1719 /// synchronisation contract (§6.5.2).
1720 pub pending_table_size_update: Option<u32>,
1721 /// Reusable buffer for HPACK-encoded headers in the H2 block converter.
1722 pub converter_buf: Vec<u8>,
1723 /// Reusable buffer for lowercasing header keys in the H2 block converter.
1724 pub lowercase_buf: Vec<u8>,
1725 /// Reusable buffer for assembling cookie values in the H2 block converter.
1726 pub cookie_buf: Vec<u8>,
1727 /// Connection draining state for graceful shutdown.
1728 pub drain: H2DrainState,
1729 pub zero: GenericHttpStream,
1730 /// Byte accounting for connection overhead attribution.
1731 pub bytes: H2ByteAccounting,
1732 /// Flood detector for CVE mitigations (Rapid Reset, CONTINUATION, Ping, Settings floods).
1733 pub flood_detector: H2FloodDetector,
1734 /// RFC 9113 §6.5: timestamp when we sent SETTINGS and are awaiting ACK.
1735 /// If the peer does not ACK within SETTINGS_ACK_TIMEOUT, we send GOAWAY
1736 /// with SettingsTimeout error.
1737 pub settings_sent_at: Option<Instant>,
1738 /// Queued RST_STREAM frames to send: Vec<(stream_id, error_code)>.
1739 /// Used when refusing streams (MAX_CONCURRENT_STREAMS, buffer exhaustion)
1740 /// during readable — the actual write happens in the writable preamble
1741 /// to avoid conflicting with kawa.storage usage for frame payload discard.
1742 pub pending_rst_streams: Vec<(StreamId, H2Error)>,
1743 /// RFC 9113 §6.8: tracks stream IDs for which RST_STREAM has already been sent,
1744 /// preventing duplicate RST_STREAM frames on the wire.
1745 pub rst_sent: HashSet<StreamId>,
1746 /// Lifetime counter of RST_STREAM frames queued (pending + already flushed).
1747 /// Used to detect sustained misbehavior even when writable() drains the
1748 /// pending queue between readable() calls.
1749 pub total_rst_streams_queued: usize,
1750 /// Reusable buffer for priority-sorted stream IDs in write_streams().
1751 /// Cleared and reused each call to avoid per-frame allocation.
1752 priorities_buf: Vec<StreamId>,
1753 /// True once we've asked rustls to emit TLS close_notify for this frontend.
1754 close_notify_sent: bool,
1755 /// Per-listener H2 connection tuning (window size, max streams, shrink ratio).
1756 pub connection_config: H2ConnectionConfig,
1757 /// Maximum pending WINDOW_UPDATE entries before dropping.
1758 /// Derived from `connection_config.max_concurrent_streams` at construction.
1759 max_pending_window_updates: usize,
1760 /// Last `(connection_window, active_streams, pending_window_updates)` snapshot
1761 /// emitted by [`Self::gauge_connection_state`]. The snapshot represents this
1762 /// connection's *contribution* to the three `h2.connection.*` aggregate
1763 /// gauges; each call emits the signed delta against this snapshot via
1764 /// [`gauge_add!`] so the gauge sums across connections.
1765 ///
1766 /// Stays `None` until the first emission. [`Drop`] applies the negative of
1767 /// this snapshot so the connection's contribution is always rebalanced to
1768 /// zero on teardown — independent of which close path runs.
1769 last_gauge_snapshot: Option<(usize, usize, usize)>,
1770 /// Per-stream wall-clock timestamp of last meaningful activity (DATA or
1771 /// HEADERS frame receipt). Used to cancel streams that make no forward
1772 /// progress within [`Self::stream_idle_timeout`] — mitigates slow-multiplex
1773 /// Slowloris: connection-level idle timers reset on every frame, so a
1774 /// misbehaving peer can otherwise pin up to `max_concurrent_streams` slots
1775 /// for the full nominal connection timeout.
1776 ///
1777 /// Initialized when the stream is created and refreshed on each non-empty
1778 /// inbound DATA frame and on HEADERS for an existing stream (trailers).
1779 /// Empty DATA frames (CVE-2019-9518 vector) do NOT refresh the timer.
1780 pub stream_last_activity_at: HashMap<StreamId, Instant>,
1781 /// Per-stream timestamp of when the stream first became flow-control-stalled
1782 /// on the OUTBOUND (response) side — it holds buffered response data it
1783 /// cannot drain because its effective send window `min(stream.window,
1784 /// connection.window)` is exhausted (the HTTP/2 window-stall /
1785 /// WINDOW_UPDATE-drip vector). Distinct from [`Self::stream_last_activity_at`]:
1786 /// this map is armed/cleared ONLY by outbound flow-control progress and is
1787 /// NEVER refreshed by inbound DATA/HEADERS or connection-level frames, so a
1788 /// peer dribbling 1-byte DATA on a stalled stream cannot keep it warm (the
1789 /// liveness timer alone misses this because inbound drips refresh it). Reaped
1790 /// by [`Self::cancel_timed_out_streams`] after [`Self::stream_idle_timeout`].
1791 pub stream_fc_stalled_since: HashMap<StreamId, Instant>,
1792 /// Cumulative outbound flow-control bytes drained on a window-stalled stream
1793 /// SINCE its [`Self::stream_fc_stalled_since`] deadline was armed (M2
1794 /// cumulative-stall budget). An entry exists IFF `stream_fc_stalled_since`
1795 /// has one for the stream; the two maps are kept in lockstep at every
1796 /// arm/clear/evict site. Closes the `WINDOW_UPDATE(+1)`-drip residual: a
1797 /// 1-byte drain no longer clears the deadline — only cumulative progress
1798 /// reaching [`FC_STALL_CLEAR_FLOOR`] does.
1799 pub stream_fc_stalled_progress: HashMap<StreamId, usize>,
1800 /// Per-stream idle cap. Streams with no activity for longer than this are
1801 /// RST_STREAM(CANCEL)'d by [`Self::cancel_timed_out_streams`].
1802 pub stream_idle_timeout: std::time::Duration,
1803 /// RFC 9113 §5.1.2 back-pressure: count of stream refusals
1804 /// (REFUSED_STREAM emitted via [`Self::refuse_stream_and_discard`]) within
1805 /// the current back-pressure window. When the count exceeds
1806 /// [`BACKPRESSURE_REFUSAL_THRESHOLD`] inside one
1807 /// [`BACKPRESSURE_WINDOW_DURATION`] we halve the advertised
1808 /// `SETTINGS_MAX_CONCURRENT_STREAMS` to signal the peer to slow down.
1809 refuse_count_window: u32,
1810 /// Start timestamp for the current back-pressure window.
1811 refuse_window_start: Instant,
1812 /// Set once we have halved `local_settings.settings_max_concurrent_streams`
1813 /// in response to a refusal burst. Prevents the cap from collapsing to 0
1814 /// on sustained abuse — a single halving per connection is sufficient to
1815 /// signal back-pressure; further bursts trigger `EnhanceYourCalm`.
1816 mcs_backpressure_applied: bool,
1817}
1818impl<Front: SocketHandler> std::fmt::Debug for ConnectionH2<Front> {
1819 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1820 f.debug_struct("ConnectionH2")
1821 .field("position", &self.position)
1822 .field("state", &self.state)
1823 .field("expect", &self.expect_read)
1824 .field("readiness", &self.readiness)
1825 .field("local_settings", &self.local_settings)
1826 .field("peer_settings", &self.peer_settings)
1827 .field("socket", &self.socket.socket_ref())
1828 .field("streams", &self.streams)
1829 .field("zero", &self.zero.storage.meter(20))
1830 .field("window", &self.flow_control.window)
1831 .field("total_rst_streams_queued", &self.total_rst_streams_queued)
1832 .finish()
1833 }
1834}
1835
1836/// Symmetric tear-down for the three `h2.connection.*` aggregate gauges:
1837/// whatever positive contribution this connection made via
1838/// [`ConnectionH2::gauge_connection_state`] is subtracted back out when the
1839/// connection is dropped.
1840///
1841/// Using `Drop` (rather than wiring decrements into every close path —
1842/// `graceful_goaway`, `force_disconnect`, `handle_goaway_frame`, `Mux::close`,
1843/// stream-id exhaustion, panic-unwind) is what guarantees the gauge is
1844/// arithmetically symmetric regardless of which path teardown took. Past
1845/// underflow incidents (commits a650ad69, d2f01ed4) have all been
1846/// missing-decrement bugs that `Drop` makes structurally impossible.
1847impl<Front: SocketHandler> Drop for ConnectionH2<Front> {
1848 fn drop(&mut self) {
1849 self.release_connection_gauges();
1850 }
1851}
1852
1853#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1854pub enum H2StreamId {
1855 Zero,
1856 Other { id: StreamId, gid: GlobalStreamId },
1857}
1858
1859impl<Front: SocketHandler> ConnectionH2<Front> {
1860 fn frontend_hung_up_while_draining(&self) -> bool {
1861 matches!(self.position, Position::Server)
1862 && self.drain.draining
1863 && (self.readiness.event.is_hup() || self.readiness.event.is_error())
1864 }
1865
1866 /// Once the final GOAWAY has been queued and all streams/control frames are
1867 /// gone, a peer-side HUP/ERR means any remaining rustls backlog is no
1868 /// longer deliverable. Waiting on `socket_wants_write()` in that state can
1869 /// deadlock shutdown forever because GOAWAY disables further frame reads.
1870 fn peer_gone_after_final_goaway(&self) -> bool {
1871 self.frontend_hung_up_while_draining()
1872 && matches!(self.state, H2State::GoAway | H2State::Error)
1873 && self.streams.is_empty()
1874 && self.expect_write.is_none()
1875 && self.zero.storage.is_empty()
1876 }
1877
1878 /// Shared constructor for both server and client H2 connections.
1879 ///
1880 /// Differences between server and client are captured by the caller-provided
1881 /// `position`, `expect_read`, and `readiness_interest` parameters.
1882 #[allow(clippy::too_many_arguments)]
1883 pub(super) fn new(
1884 session_ulid: Ulid,
1885 socket: Front,
1886 position: super::Position,
1887 pool: std::rc::Weak<std::cell::RefCell<crate::pool::Pool>>,
1888 flood_config: H2FloodConfig,
1889 connection_config: H2ConnectionConfig,
1890 stream_idle_timeout: std::time::Duration,
1891 graceful_shutdown_deadline: Option<std::time::Duration>,
1892 timeout_container: crate::timer::TimeoutContainer,
1893 expect_read: Option<(H2StreamId, usize)>,
1894 readiness_interest: sozu_command::ready::Ready,
1895 ) -> Option<Self> {
1896 let buffer = pool
1897 .upgrade()
1898 .and_then(|pool| pool.borrow_mut().checkout())?;
1899 let local_settings = H2Settings {
1900 settings_max_concurrent_streams: connection_config.max_concurrent_streams,
1901 ..H2Settings::default()
1902 };
1903 let mut decoder = loona_hpack::Decoder::new();
1904 // RFC 7541 §4.2: enforce SETTINGS_HEADER_TABLE_SIZE as the upper bound
1905 // for dynamic table size updates from the peer
1906 decoder.set_max_allowed_table_size(local_settings.settings_header_table_size as usize);
1907 Some(ConnectionH2 {
1908 session_ulid,
1909 decoder,
1910 encoder: loona_hpack::Encoder::new(),
1911 expect_read,
1912 expect_write: None,
1913 last_stream_id: 0,
1914 local_settings,
1915 peer_settings: H2Settings::default(),
1916 position,
1917 prioriser: Prioriser::default(),
1918 readiness: crate::Readiness {
1919 interest: readiness_interest,
1920 event: Ready::EMPTY,
1921 },
1922 socket,
1923 state: H2State::ClientPreface,
1924 streams: std::collections::HashMap::with_capacity(8),
1925 timeout_container,
1926 flow_control: H2FlowControl {
1927 window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
1928 received_bytes_since_update: 0,
1929 pending_window_updates: HashMap::new(),
1930 },
1931 highest_peer_stream_id: 0,
1932 pending_table_size_update: None,
1933 converter_buf: Vec::new(),
1934 lowercase_buf: Vec::new(),
1935 cookie_buf: Vec::new(),
1936 drain: H2DrainState {
1937 draining: false,
1938 peer_last_stream_id: None,
1939 started_at: None,
1940 graceful_shutdown_deadline,
1941 },
1942 zero: kawa::Kawa::new(kawa::Kind::Request, kawa::Buffer::new(buffer)),
1943 bytes: H2ByteAccounting {
1944 zero_bytes_read: 0,
1945 overhead_bin: 0,
1946 overhead_bout: 0,
1947 },
1948 flood_detector: H2FloodDetector::new(flood_config),
1949 settings_sent_at: None,
1950 pending_rst_streams: Vec::new(),
1951 rst_sent: std::collections::HashSet::new(),
1952 total_rst_streams_queued: 0,
1953 priorities_buf: Vec::new(),
1954 close_notify_sent: false,
1955 max_pending_window_updates: 1 + connection_config.max_concurrent_streams as usize * 4,
1956 connection_config,
1957 last_gauge_snapshot: None,
1958 stream_last_activity_at: HashMap::new(),
1959 stream_fc_stalled_since: HashMap::new(),
1960 stream_fc_stalled_progress: HashMap::new(),
1961 stream_idle_timeout,
1962 refuse_count_window: 0,
1963 refuse_window_start: Instant::now(),
1964 mcs_backpressure_applied: false,
1965 })
1966 }
1967
1968 /// Start TLS close_notify on the frontend and keep the session alive until
1969 /// rustls has flushed the generated records.
1970 pub fn initiate_close_notify(&mut self) -> bool {
1971 if !self.position.is_server()
1972 || matches!(
1973 self.state,
1974 H2State::ClientPreface | H2State::ClientSettings | H2State::ServerSettings
1975 )
1976 {
1977 return false;
1978 }
1979 if !self.close_notify_sent {
1980 trace!("{} H2 initiating CLOSE_NOTIFY", log_context!(self));
1981 self.socket.socket_close();
1982 self.close_notify_sent = true;
1983 }
1984 if self.socket.socket_wants_write() {
1985 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
1986 self.ensure_tls_flushed();
1987 true
1988 } else {
1989 false
1990 }
1991 }
1992
1993 fn expect_header(&mut self) {
1994 self.state = H2State::Header;
1995 self.expect_read = Some((H2StreamId::Zero, 9));
1996 }
1997
1998 /// Process the `H2State::Header` state: parse a 9-byte frame header from
1999 /// `self.zero`, validate the stream, create new streams if needed, and
2000 /// transition to `H2State::Frame` for the payload.
2001 ///
2002 /// Returns `MuxResult` — the caller should propagate the result directly.
2003 fn handle_header_state<L>(&mut self, context: &mut Context<L>) -> MuxResult
2004 where
2005 L: ListenerHandler + L7ListenerHandler,
2006 {
2007 let i = self.zero.storage.data();
2008 trace!("{} header: {:?}", log_context!(self), i);
2009 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2010 Ok((_, header)) => {
2011 trace!("{} {:#?}", log_context!(self), header);
2012 self.zero.storage.clear();
2013 let stream_id = header.stream_id;
2014 // RFC 9113 §6.10: CONTINUATION frames MUST be preceded by a
2015 // HEADERS or PUSH_PROMISE frame without END_HEADERS. When we
2016 // reach `handle_header_state`, we are between frames and no
2017 // header block is in progress (otherwise the state would be
2018 // `H2State::ContinuationHeader`). A CONTINUATION frame arriving
2019 // here is therefore standalone and MUST be treated as a
2020 // connection error of type PROTOCOL_ERROR.
2021 if header.frame_type == FrameType::Continuation {
2022 error!(
2023 "{} standalone CONTINUATION frame on stream {} without preceding HEADERS",
2024 log_context!(self),
2025 stream_id
2026 );
2027 return self.goaway(H2Error::ProtocolError);
2028 }
2029 // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
2030 // Route unknown frames (and any stream_id == 0 control frame)
2031 // through stream 0 (the connection-level buffer) so
2032 // `handle_frame` can drop them without touching stream state.
2033 let read_stream = if stream_id == 0
2034 || matches!(header.frame_type, FrameType::Unknown(_))
2035 {
2036 H2StreamId::Zero
2037 } else if let Some(global_stream_id) = self.streams.get(&stream_id) {
2038 let allowed_on_half_closed = header.frame_type == FrameType::WindowUpdate
2039 || header.frame_type == FrameType::Priority
2040 || header.frame_type == FrameType::RstStream;
2041 let stream = &context.streams[*global_stream_id];
2042 // Use the position-aware end_of_stream flag:
2043 // - Server reads from front (client requests)
2044 // - Client reads from back (backend responses)
2045 let received_eos = if self.position.is_server() {
2046 stream.front_received_end_of_stream
2047 } else {
2048 stream.back_received_end_of_stream
2049 };
2050 trace!(
2051 "{} REQUESTING EXISTING STREAM {}: {}/{:?}",
2052 log_context!(self),
2053 stream_id,
2054 received_eos,
2055 stream.state
2056 );
2057 if !allowed_on_half_closed && (received_eos || !stream.state.is_open()) {
2058 error!(
2059 "{} CANNOT RECEIVE {:?} ON THIS STREAM {:?}",
2060 log_context!(self),
2061 header.frame_type,
2062 stream.state
2063 );
2064 return self.goaway(H2Error::StreamClosed);
2065 }
2066 // RFC 9113 §8.1: a HEADERS frame received in the body
2067 // phase is a trailer block and MUST carry END_STREAM. This
2068 // closes the request-smuggling primitive where a peer sends
2069 // HEADERS, DATA, HEADERS (no END_STREAM) to chain header
2070 // blocks on the same stream ID.
2071 //
2072 // Discriminate from the read-side Kawa parsing phase rather
2073 // than stream existence: on Position::Client the stream is
2074 // created when we send the request to the backend, so the
2075 // initial backend response HEADERS legitimately arrives on
2076 // an existing stream. Similarly, 1xx→final transitions on
2077 // either side may yield multiple HEADERS frames before the
2078 // body begins (kawa clears back to initial / terminated on
2079 // 1xx; neither is main_phase). Only HEADERS arriving once
2080 // the read side has transitioned to Body/Chunks parsing —
2081 // i.e. after headers were fully consumed and body framing
2082 // is in progress — may be a trailer.
2083 let read_in_body = if self.position.is_server() {
2084 stream.front.is_main_phase()
2085 } else {
2086 stream.back.is_main_phase()
2087 };
2088 if header.frame_type == FrameType::Headers
2089 && read_in_body
2090 && header.flags & parser::FLAG_END_STREAM == 0
2091 {
2092 error!(
2093 "{} HEADERS without END_STREAM on open stream {} in body phase: trailers MUST carry END_STREAM",
2094 log_context!(self),
2095 stream_id
2096 );
2097 return self.goaway(H2Error::ProtocolError);
2098 }
2099 if header.frame_type == FrameType::Data {
2100 H2StreamId::Other {
2101 id: stream_id,
2102 gid: *global_stream_id,
2103 }
2104 } else {
2105 H2StreamId::Zero
2106 }
2107 } else {
2108 // RFC 9113 §5.1.1: stream identifiers MUST be strictly
2109 // increasing. Tightened from `>=` to `>` so that a peer
2110 // cannot re-use `self.last_stream_id` (which would
2111 // conflict with our own server-pushed streams if we
2112 // ever enable push in the future). For the first
2113 // request on a fresh connection `last_stream_id == 0`
2114 // and any client-initiated odd stream still passes.
2115 if header.frame_type == FrameType::Headers
2116 && self.position.is_server()
2117 && stream_id & 1 == 1
2118 && stream_id > self.last_stream_id
2119 {
2120 // RFC 9113 §6.8: after sending a GOAWAY, the proxy
2121 // MUST NOT accept new streams.
2122 // `graceful_goaway` sets `drain.draining = true`
2123 // and sends an initial GOAWAY with last_stream_id =
2124 // STREAM_ID_MAX (so in-flight requests are still
2125 // accepted), but the contract for *new* peer-
2126 // initiated streams is that they must be refused.
2127 // Without this check, a peer racing the drain
2128 // window could open arbitrary new streams between
2129 // the initial and final GOAWAY emission.
2130 if self.drain.draining {
2131 if stream_id > self.highest_peer_stream_id {
2132 self.highest_peer_stream_id = stream_id;
2133 }
2134 return self.refuse_stream_and_discard(
2135 stream_id,
2136 H2Error::RefusedStream,
2137 header.payload_len,
2138 );
2139 }
2140 if self.streams.len()
2141 >= self.local_settings.settings_max_concurrent_streams as usize
2142 {
2143 error!(
2144 "{} MAX CONCURRENT STREAMS: limit={}, current={}",
2145 log_context!(self),
2146 self.local_settings.settings_max_concurrent_streams,
2147 self.streams.len()
2148 );
2149 // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
2150 // queueing RST_STREAM so GOAWAY reports the correct
2151 // last_stream_id if the connection closes later.
2152 if stream_id > self.highest_peer_stream_id {
2153 self.highest_peer_stream_id = stream_id;
2154 }
2155 return self.refuse_stream_and_discard(
2156 stream_id,
2157 H2Error::RefusedStream,
2158 header.payload_len,
2159 );
2160 }
2161 match self.create_stream(stream_id, context) {
2162 Some(_) => {}
2163 None => {
2164 // Buffer pool exhaustion is transient — refuse
2165 // this stream but keep the connection alive so
2166 // existing streams can complete and free buffers.
2167 error!(
2168 "{} Could not create stream {}: buffer pool exhausted",
2169 log_context!(self),
2170 stream_id
2171 );
2172 // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
2173 // queueing RST_STREAM so GOAWAY reports the correct
2174 // last_stream_id if the connection closes later.
2175 if stream_id > self.highest_peer_stream_id {
2176 self.highest_peer_stream_id = stream_id;
2177 }
2178 return self.refuse_stream_and_discard(
2179 stream_id,
2180 H2Error::RefusedStream,
2181 header.payload_len,
2182 );
2183 }
2184 }
2185 } else if header.frame_type != FrameType::Priority {
2186 // Distinguish closed vs idle: check whether the stream
2187 // was previously opened. For Server position, compare
2188 // against highest_peer_stream_id (client-initiated).
2189 // For Client position, compare against last_stream_id
2190 // (our own initiated streams) since the peer never
2191 // initiates streams on a backend connection.
2192 let is_closed_stream = if self.position.is_server() {
2193 header.stream_id <= self.highest_peer_stream_id
2194 } else {
2195 header.stream_id < self.last_stream_id
2196 };
2197 if is_closed_stream {
2198 match header.frame_type {
2199 FrameType::RstStream | FrameType::WindowUpdate => {
2200 // RFC 9113 §5.1: RST_STREAM and WINDOW_UPDATE
2201 // on a closed stream can arrive due to race
2202 // conditions and should be consumed/discarded.
2203 debug!(
2204 "{} Ignoring {:?} on closed stream {}",
2205 log_context!(self),
2206 header.frame_type,
2207 header.stream_id
2208 );
2209 self.flood_detector.glitch_count += 1;
2210 check_flood_or_return!(self);
2211 }
2212 FrameType::Data => {
2213 // RFC 9113 §5.1: DATA on a closed stream is a
2214 // stream error of type STREAM_CLOSED. Queue
2215 // RST_STREAM (not GOAWAY) to preserve the
2216 // connection for other streams. The payload is
2217 // still routed through stream 0 so handle_frame
2218 // can do connection-level flow control accounting.
2219 debug!(
2220 "{} DATA on closed stream {}, sending RST_STREAM(STREAM_CLOSED)",
2221 log_context!(self),
2222 header.stream_id
2223 );
2224 self.flood_detector.glitch_count += 1;
2225 check_flood_or_return!(self);
2226 if let Some(result) =
2227 self.enqueue_rst(header.stream_id, H2Error::StreamClosed)
2228 {
2229 return result;
2230 }
2231 }
2232 _ => {
2233 // RFC 9113 §5.1: HEADERS or other frames on a
2234 // closed stream → connection error STREAM_CLOSED.
2235 error!(
2236 "{} Received {:?} on closed stream {}, sending GOAWAY(STREAM_CLOSED)",
2237 log_context!(self),
2238 header.frame_type,
2239 header.stream_id
2240 );
2241 return self.goaway(H2Error::StreamClosed);
2242 }
2243 }
2244 } else {
2245 error!(
2246 "{} Received {:?} on idle stream {}, sending GOAWAY(PROTOCOL_ERROR)",
2247 log_context!(self),
2248 header.frame_type,
2249 header.stream_id
2250 );
2251 return self.goaway(H2Error::ProtocolError);
2252 }
2253 }
2254 H2StreamId::Zero
2255 };
2256 trace!(
2257 "{} {} {:?} {:#?}",
2258 log_context!(self),
2259 header.stream_id,
2260 stream_id,
2261 self.streams
2262 );
2263 self.expect_read = Some((read_stream, header.payload_len as usize));
2264 self.state = H2State::Frame(header);
2265 }
2266 Err(error) => {
2267 let error = error_nom_to_h2(error);
2268 error!("{} COULD NOT PARSE FRAME HEADER", log_context!(self));
2269 return self.goaway(error);
2270 }
2271 };
2272 MuxResult::Continue
2273 }
2274
2275 /// Process the `H2State::ContinuationHeader` state: parse a CONTINUATION
2276 /// frame header from `self.zero`, validate stream ID continuity, track
2277 /// flood detection counters, and transition to `ContinuationFrame`.
2278 ///
2279 /// The `headers` parameter is the accumulated HEADERS context from the
2280 /// initial HEADERS frame (cloned from the state enum to avoid borrow
2281 /// conflicts).
2282 fn handle_continuation_header_state(&mut self, headers: &Headers) -> MuxResult {
2283 let i = self.zero.storage.unparsed_data();
2284 trace!("{} continuation header: {:?}", log_context!(self), i);
2285 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2286 Ok((
2287 _,
2288 FrameHeader {
2289 payload_len,
2290 frame_type: FrameType::Continuation,
2291 flags,
2292 stream_id,
2293 },
2294 )) => {
2295 if self.zero.storage.end < 9 {
2296 error!(
2297 "{} CONTINUATION header: storage.end ({}) too small to remove frame header",
2298 log_context!(self),
2299 self.zero.storage.end
2300 );
2301 return self.goaway(H2Error::InternalError);
2302 }
2303 self.zero.storage.end -= 9;
2304 if stream_id != headers.stream_id {
2305 error!(
2306 "{} CONTINUATION stream_id {} does not match HEADERS stream_id {}",
2307 log_context!(self),
2308 stream_id,
2309 headers.stream_id
2310 );
2311 return self.goaway(H2Error::ProtocolError);
2312 }
2313 // CVE-2024-27316: track CONTINUATION frame count and accumulated size
2314 let cont_count_before = self.flood_detector.continuation_count;
2315 let acc_size_before = self.flood_detector.accumulated_header_size;
2316 self.flood_detector.continuation_count += 1;
2317 self.flood_detector.accumulated_header_size = self
2318 .flood_detector
2319 .accumulated_header_size
2320 .saturating_add(payload_len);
2321 // Per-block CONTINUATION accounting must grow monotonically
2322 // within a header block: each frame bumps the count by one and
2323 // the accumulated size by the frame's payload (never shrinks
2324 // mid-block). `reset_continuation` is the only thing allowed to
2325 // zero these — and only once the block is complete.
2326 debug_assert_eq!(
2327 self.flood_detector.continuation_count,
2328 cont_count_before + 1,
2329 "CONTINUATION per-block counter must advance by one per frame"
2330 );
2331 debug_assert!(
2332 self.flood_detector.accumulated_header_size >= acc_size_before,
2333 "accumulated header size must not shrink within a header block"
2334 );
2335 check_flood_or_return!(self);
2336 // RFC 9113 §10.5.1: reject header blocks that cannot be
2337 // buffered. Previously we silently removed READABLE interest
2338 // when amount > available_space, stalling the connection.
2339 // If the payload still fits in our zero buffer we can refuse
2340 // just this stream (RST_STREAM + drain); if not, the
2341 // connection can no longer decode header blocks safely and we
2342 // escalate to GOAWAY(EnhanceYourCalm).
2343 if self.flood_detector.accumulated_header_size
2344 > self.flood_detector.config.max_header_list_size
2345 {
2346 error!(
2347 "{} CONTINUATION accumulated header size {} exceeds {}",
2348 log_context!(self),
2349 self.flood_detector.accumulated_header_size,
2350 self.flood_detector.config.max_header_list_size
2351 );
2352 if (payload_len as usize) > self.zero.storage.available_space() {
2353 return self.goaway(H2Error::EnhanceYourCalm);
2354 }
2355 // Remove the already-created stream slot before refusing,
2356 // so it does not leak against MAX_CONCURRENT_STREAMS. Route
2357 // through `remove_dead_stream` so the expect_write/read
2358 // invariant (§LIFECYCLE.md 5.4) holds on this path too.
2359 if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
2360 self.remove_dead_stream(stream_id, global_stream_id);
2361 }
2362 return self.refuse_stream_and_discard(
2363 stream_id,
2364 H2Error::RefusedStream,
2365 payload_len,
2366 );
2367 }
2368 if (payload_len as usize) > self.zero.storage.available_space() {
2369 error!(
2370 "{} CONTINUATION payload {} exceeds buffer space {}",
2371 log_context!(self),
2372 payload_len,
2373 self.zero.storage.available_space()
2374 );
2375 return self.goaway(H2Error::EnhanceYourCalm);
2376 }
2377 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2378 let mut headers = headers.clone();
2379 headers.end_headers = flags & parser::FLAG_END_HEADERS != 0;
2380 headers.header_block_fragment.len = headers
2381 .header_block_fragment
2382 .len
2383 .saturating_add(payload_len);
2384 self.state = H2State::ContinuationFrame(headers);
2385 }
2386 Err(error) => {
2387 let error = error_nom_to_h2(error);
2388 error!("{} COULD NOT PARSE CONTINUATION HEADER", log_context!(self));
2389 return self.goaway(error);
2390 }
2391 other => {
2392 error!(
2393 "{} UNEXPECTED {:?} WHILE PARSING CONTINUATION HEADER",
2394 log_context!(self),
2395 other
2396 );
2397 return self.goaway(H2Error::ProtocolError);
2398 }
2399 };
2400 MuxResult::Continue
2401 }
2402
2403 pub fn readable<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2404 where
2405 E: Endpoint,
2406 L: ListenerHandler + L7ListenerHandler,
2407 {
2408 self.prune_inactive_streams_while_closing(context);
2409 // Pass 4 Medium #3: per-stream idle guard. Slow-multiplex Slowloris
2410 // sends one byte or a control frame per stream just often enough to
2411 // reset the connection-level timer; per-stream deadlines catch it.
2412 self.cancel_timed_out_streams(context, &mut endpoint);
2413
2414 // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
2415 if let Some(sent_at) = self.settings_sent_at {
2416 if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
2417 warn!(
2418 "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
2419 log_context!(self),
2420 SETTINGS_ACK_TIMEOUT
2421 );
2422 return self.goaway(H2Error::SettingsTimeout);
2423 }
2424 }
2425
2426 // Don't reset the timeout unconditionally here. Only application data
2427 // (DATA/HEADERS frames) should reset the timeout. H2 control frames
2428 // (PING, WINDOW_UPDATE, SETTINGS) must NOT reset it, otherwise a peer
2429 // sending periodic PINGs prevents timeout detection on stuck sessions.
2430 // The timeout is reset:
2431 // - Below, when reading DATA payload (H2StreamId::Other)
2432 // - In handle_frame(), when processing HEADERS frames
2433 let (stream_id, kawa) = if let Some((stream_id, amount)) = self.expect_read {
2434 let (kawa, did) = match stream_id {
2435 H2StreamId::Zero => (&mut self.zero, usize::MAX),
2436 H2StreamId::Other {
2437 gid: global_stream_id,
2438 ..
2439 } => {
2440 // Reading DATA frame payload for an application stream.
2441 // This is real application activity — reset the timeout.
2442 self.timeout_container.reset();
2443 (
2444 context.streams[global_stream_id]
2445 .split(&self.position)
2446 .rbuffer,
2447 global_stream_id,
2448 )
2449 }
2450 };
2451 trace!(
2452 "{} {:?}({:?}, {})",
2453 log_context!(self),
2454 self.state,
2455 stream_id,
2456 amount
2457 );
2458 if amount > 0 {
2459 if amount > kawa.storage.available_space() {
2460 self.readiness.interest.remove(Ready::READABLE);
2461 return MuxResult::Continue;
2462 }
2463 let (size, status) = self.socket.socket_read(&mut kawa.storage.space()[..amount]);
2464 context.debug.push(DebugEvent::SocketIO(0, did, size));
2465 kawa.storage.fill(size);
2466 self.position.count_bytes_in_counter(size);
2467 self.bytes.zero_bytes_read += size;
2468 if update_readiness_after_read(size, status, &mut self.readiness) {
2469 if matches!(self.position, Position::Server)
2470 && self.drain.draining
2471 && matches!(status, SocketResult::Closed | SocketResult::Error)
2472 {
2473 // During graceful drain, a frontend EOF/HUP means no
2474 // further frame headers or payload bytes can arrive.
2475 // Keeping expect_read here strands the connection in
2476 // Header/Frame forever even after the peer is gone.
2477 self.expect_read = None;
2478 }
2479 return MuxResult::Continue;
2480 } else if size == amount {
2481 self.expect_read = None;
2482 } else {
2483 self.expect_read = Some((stream_id, amount - size));
2484 if let (H2State::ClientPreface, Position::Server) =
2485 (&self.state, &self.position)
2486 {
2487 let i = kawa.storage.data();
2488 if !b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n".starts_with(i) {
2489 debug!("{} EARLY INVALID PREFACE: {:?}", log_context!(self), i);
2490 return self.force_disconnect();
2491 }
2492 }
2493 return MuxResult::Continue;
2494 }
2495 } else {
2496 self.expect_read = None;
2497 }
2498 (stream_id, kawa)
2499 } else {
2500 self.readiness.event.remove(Ready::READABLE);
2501 return MuxResult::Continue;
2502 };
2503 match (&self.state, &self.position) {
2504 (H2State::Error, _)
2505 | (H2State::GoAway, _)
2506 | (H2State::ServerSettings, Position::Server)
2507 | (H2State::ClientPreface, Position::Client(..))
2508 | (H2State::ClientSettings, Position::Client(..)) => {
2509 error!(
2510 "{} Unexpected combination: (Readable, {:?}, {:?})",
2511 log_context!(self),
2512 self.state,
2513 self.position
2514 );
2515 return self.force_disconnect();
2516 }
2517 (H2State::Discard, _) => {
2518 let _i = kawa.storage.data();
2519 trace!("{} DISCARDING: {:?}", log_context!(self), _i);
2520 kawa.storage.clear();
2521 self.attribute_bytes_to_overhead();
2522 self.expect_header();
2523 }
2524 (H2State::ClientPreface, Position::Server) => {
2525 let i = kawa.storage.data();
2526 let i = match parser::preface(i) {
2527 Ok((i, _)) => i,
2528 Err(_) => return self.force_disconnect(),
2529 };
2530 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2531 Ok((
2532 _,
2533 FrameHeader {
2534 payload_len,
2535 frame_type: FrameType::Settings,
2536 flags: 0,
2537 stream_id: 0,
2538 },
2539 )) => {
2540 kawa.storage.clear();
2541 self.state = H2State::ClientSettings;
2542 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2543 }
2544 _ => return self.force_disconnect(),
2545 };
2546 }
2547 (H2State::ClientSettings, Position::Server) => {
2548 let i = kawa.storage.data();
2549 let settings = match parser::settings_frame(
2550 i,
2551 &FrameHeader {
2552 payload_len: i.len() as u32,
2553 frame_type: FrameType::Settings,
2554 flags: 0,
2555 stream_id: 0,
2556 },
2557 ) {
2558 Ok((_, settings)) => {
2559 kawa.storage.clear();
2560 settings
2561 }
2562 Err(_) => return self.force_disconnect(),
2563 };
2564 let kawa = &mut self.zero;
2565 match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
2566 Ok((_, size)) => {
2567 kawa.storage.fill(size);
2568 incr!(names::h2::FRAMES_TX_SETTINGS);
2569 // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
2570 self.settings_sent_at = Some(Instant::now());
2571 }
2572 Err(error) => {
2573 error!(
2574 "{} Could not serialize SettingsFrame: {:?}",
2575 log_context!(self),
2576 error
2577 );
2578 return self.force_disconnect();
2579 }
2580 };
2581
2582 self.state = H2State::ServerSettings;
2583 self.expect_write = Some(H2StreamId::Zero);
2584 self.readiness.signal_pending_write();
2585 return self.handle_frame(settings, 0, context, endpoint);
2586 }
2587 (H2State::ServerSettings, Position::Client(..)) => {
2588 let i = kawa.storage.data();
2589 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2590 Ok((
2591 _,
2592 header @ FrameHeader {
2593 payload_len,
2594 frame_type: FrameType::Settings,
2595 flags: 0,
2596 stream_id: 0,
2597 },
2598 )) => {
2599 kawa.storage.clear();
2600 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2601 self.state = H2State::Frame(header)
2602 }
2603 _ => return self.force_disconnect(),
2604 };
2605 }
2606 (H2State::Header, _) => {
2607 return self.handle_header_state(context);
2608 }
2609 (H2State::ContinuationHeader(headers), _) => {
2610 let headers = headers.clone();
2611 return self.handle_continuation_header_state(&headers);
2612 }
2613 (H2State::Frame(header), _) => {
2614 let i = kawa.storage.unparsed_data();
2615 trace!("{} data: {:?}", log_context!(self), i);
2616 let wire_payload_len = header.payload_len;
2617 let frame = match parser::frame_body(i, header) {
2618 Ok((_, frame)) => frame,
2619 Err(error) => {
2620 let error = error_nom_to_h2(error);
2621 error!("{} COULD NOT PARSE FRAME BODY", log_context!(self));
2622 return self.goaway(error);
2623 }
2624 };
2625 if let H2StreamId::Zero = stream_id {
2626 if header.frame_type == FrameType::Headers {
2627 kawa.storage.head = kawa.storage.end;
2628 } else {
2629 kawa.storage.end = kawa.storage.head;
2630 }
2631 }
2632 self.expect_header();
2633 return self.handle_frame(frame, wire_payload_len, context, endpoint);
2634 }
2635 (H2State::ContinuationFrame(headers), _) => {
2636 kawa.storage.head = kawa.storage.end;
2637 let i = kawa.storage.data();
2638 trace!("{} data: {:?}", log_context!(self), i);
2639 let headers = headers.clone();
2640 self.expect_header();
2641 return self.handle_frame(Frame::Headers(headers), 0, context, endpoint);
2642 }
2643 }
2644 MuxResult::Continue
2645 }
2646
2647 /// Update the H2 connection-level *aggregate* gauges with this connection's
2648 /// current contribution, expressed as a signed delta against the last
2649 /// snapshot we emitted.
2650 ///
2651 /// The three metrics are emitted via [`gauge_add!`] (lifecycle deltas) so
2652 /// that the dashboard sees the **sum across all live H2 connections**:
2653 ///
2654 /// - `h2.connection.window_bytes` — sum of available connection-level
2655 /// send-window bytes. Negative per-connection windows clamp to 0 so the
2656 /// aggregate represents only available capacity, not deficit.
2657 /// - `h2.connection.active_streams` — sum of in-flight streams across
2658 /// every H2 connection.
2659 /// - `h2.connection.pending_window_updates` — sum of queued (un-flushed)
2660 /// per-stream WINDOW_UPDATE entries across every H2 connection.
2661 ///
2662 /// Called from the write hot path; emits nothing when the snapshot is
2663 /// unchanged so the steady state stays cheap. The paired decrement for
2664 /// every increment is provided by [`Drop`], which subtracts the final
2665 /// snapshot when the connection is dropped — keeping the aggregate
2666 /// arithmetically symmetric independent of which close path runs
2667 /// (`graceful_goaway`, `force_disconnect`, `handle_goaway_frame`,
2668 /// `Mux::close`, panic-unwind, …).
2669 fn gauge_connection_state(&mut self) {
2670 let snapshot = (
2671 self.flow_control.window.max(0) as usize,
2672 self.streams.len(),
2673 self.flow_control.pending_window_updates.len(),
2674 );
2675 if self.last_gauge_snapshot == Some(snapshot) {
2676 return;
2677 }
2678 let prev = self.last_gauge_snapshot.unwrap_or((0, 0, 0));
2679 // Diff in i64 — usize cannot represent the negative side of the delta.
2680 let dw = snapshot.0 as i64 - prev.0 as i64;
2681 let ds = snapshot.1 as i64 - prev.1 as i64;
2682 let du = snapshot.2 as i64 - prev.2 as i64;
2683 if dw != 0 {
2684 gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, dw);
2685 }
2686 if ds != 0 {
2687 gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, ds);
2688 }
2689 if du != 0 {
2690 gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, du);
2691 }
2692 self.last_gauge_snapshot = Some(snapshot);
2693 }
2694
2695 /// Subtract this connection's contribution from the three aggregate
2696 /// `h2.connection.*` gauges. Idempotent: clears `last_gauge_snapshot` so a
2697 /// second call (or a [`Drop`] on top of an explicit reset) is a no-op.
2698 ///
2699 /// Pairs with every prior call to [`Self::gauge_connection_state`]; called
2700 /// from [`Drop`] so the symmetry is guaranteed regardless of the close
2701 /// path.
2702 fn release_connection_gauges(&mut self) {
2703 if let Some((w, s, u)) = self.last_gauge_snapshot.take() {
2704 if w != 0 {
2705 gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, -(w as i64));
2706 }
2707 if s != 0 {
2708 gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, -(s as i64));
2709 }
2710 if u != 0 {
2711 gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, -(u as i64));
2712 }
2713 }
2714 }
2715
2716 /// Write application data (request/response bodies, headers) across all
2717 /// active streams, respecting priority ordering and flow control.
2718 ///
2719 /// This is the main data-plane write path: it resumes any partially-written
2720 /// stream, prepares new frames via the H2 block converter, flushes them to
2721 /// the socket, and recycles completed streams.
2722 ///
2723 /// NOTE: The priority iteration loop and converter setup remain inline here
2724 /// because the converter borrows `self.encoder`, preventing further
2725 /// decomposition into `&mut self` methods within the loop body.
2726 fn write_streams<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2727 where
2728 E: Endpoint,
2729 L: ListenerHandler + L7ListenerHandler,
2730 {
2731 self.timeout_container.reset();
2732 // Pre-compute byte totals for proportional overhead distribution.
2733 let byte_totals = self.compute_stream_byte_totals(context);
2734 let mut io_slices: Vec<IoSlice<'static>> = Vec::new();
2735
2736 if let Some(
2737 write_stream @ H2StreamId::Other {
2738 id: stream_id,
2739 gid: global_stream_id,
2740 },
2741 ) = self.expect_write
2742 {
2743 let stream = &mut context.streams[global_stream_id];
2744 let stream_state = stream.state;
2745 let parts = stream.split(&self.position);
2746 let kawa = parts.wbuffer;
2747 // Resume path: if the same stream is parked waiting for buffer
2748 // space (expect_read matches write_stream), pass the amount so
2749 // flush_stream_out can re-enable READABLE as soon as we drain.
2750 let cross_read_amount = match self.expect_read {
2751 Some((read_stream, amount)) if write_stream == read_stream => Some(amount),
2752 _ => None,
2753 };
2754 let mut resume_bytes: usize = 0;
2755 let outcome = Self::flush_stream_out(
2756 &mut self.socket,
2757 kawa,
2758 parts.metrics,
2759 &self.position,
2760 &mut self.readiness,
2761 &mut context.debug,
2762 2,
2763 global_stream_id,
2764 None,
2765 cross_read_amount,
2766 &mut io_slices,
2767 Some(&mut resume_bytes),
2768 );
2769 // Refresh the per-stream idle timer when outbound bytes move: a
2770 // large response delivered at low bandwidth is "active", not idle,
2771 // even when the peer sends no inbound frames.
2772 if resume_bytes > 0 {
2773 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2774 *t = Instant::now();
2775 }
2776 // Clear the flow-control-stall deadline ONLY when the effective
2777 // send window is genuinely open — that alone is a real un-stall.
2778 // A window-stalled stream can flush a `WINDOW_UPDATE(+1)`-drip
2779 // byte HERE via socket-backpressure resume; clearing on that
2780 // would reset the deadline at 1-byte granularity and re-open the
2781 // drip the M2 cumulative-stall budget closes. While still blocked,
2782 // leave the deadline (and its progress accumulator) for the main
2783 // write loop's budget to govern — keeping the two maps in lockstep.
2784 if min(*parts.window, self.flow_control.window) > 0 {
2785 self.stream_fc_stalled_since.remove(&stream_id);
2786 self.stream_fc_stalled_progress.remove(&stream_id);
2787 }
2788 }
2789 if outcome == FlushOutcome::Stalled {
2790 return MuxResult::Continue;
2791 }
2792 self.expect_write = None;
2793 if (kawa.is_terminated() || kawa.is_error())
2794 && kawa.is_completed()
2795 && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2796 {
2797 let (client_rtt, server_rtt) = Self::snapshot_rtts(
2798 &self.position,
2799 &self.socket,
2800 &endpoint,
2801 stream.linked_token(),
2802 );
2803
2804 if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2805 &self.position,
2806 &mut self.bytes,
2807 &self.streams,
2808 stream,
2809 global_stream_id,
2810 stream_id,
2811 byte_totals,
2812 &mut context.debug,
2813 context.listener.clone(),
2814 client_rtt,
2815 server_rtt,
2816 ) {
2817 // Remove the recycled stream from the connection maps
2818 // before endpoint.end_stream() can trigger teardown.
2819 // Otherwise session close can observe a stale `Recycle`
2820 // entry in self.streams and mis-handle the connection as
2821 // if it still had an active H2 stream.
2822 self.remove_dead_stream(dead_id, global_stream_id);
2823 if let Some(token) = token {
2824 remove_backend_stream(
2825 &mut context.backend_streams,
2826 token,
2827 global_stream_id,
2828 );
2829 endpoint.end_stream(token, global_stream_id, context);
2830 }
2831 }
2832 }
2833 }
2834
2835 self.gauge_connection_state();
2836
2837 let scheme: &'static [u8] = if context.listener.borrow().protocol() == Protocol::HTTPS {
2838 b"https"
2839 } else {
2840 b"http"
2841 };
2842 let mut completed_streams = Vec::new();
2843 let mut converter_buf = std::mem::take(&mut self.converter_buf);
2844 converter_buf.clear();
2845 let mut converter = converter::H2BlockConverter {
2846 max_frame_size: self.peer_settings.settings_max_frame_size as usize,
2847 window: 0,
2848 stream_id: 0,
2849 encoder: &mut self.encoder,
2850 out: converter_buf,
2851 scheme,
2852 lowercase_buf: std::mem::take(&mut self.lowercase_buf),
2853 cookie_buf: std::mem::take(&mut self.cookie_buf),
2854 // When this connection is a backend client we are writing
2855 // toward the upstream backend — flow-control stalls in that
2856 // direction are scoped to `backend.flow_control.paused` (in
2857 // addition to the existing direction-agnostic
2858 // `h2.flow_control_stall`).
2859 position_is_client: self.position.is_client(),
2860 // RFC 9218 §4: toggled per-stream in the loop below, driven by
2861 // `Prioriser::get(stream_id).1`. Non-incremental by default so
2862 // unit tests and non-scheduled callers (e.g. the resume path
2863 // above) keep the sequential semantics.
2864 incremental_mode: false,
2865 // Populated once per write pass from `apply_incremental_rotation`
2866 // below. The converter uses `incremental_peer_count <= 1` to skip
2867 // the RFC 9218 yield-after-one-DATA behaviour when there is no
2868 // peer to interleave with (solo-bucket fast path).
2869 incremental_peer_count: 0,
2870 // RFC 7541 §6.3: move the pending size-update onto the converter
2871 // so the first header block of this pass prepends the signal.
2872 // We clear the connection-side mirror only AFTER the write pass
2873 // confirms emission via `converter.size_update_emitted`, so a
2874 // DATA-only write pass (no header block) does not drop the
2875 // signal.
2876 pending_table_size_update: self.pending_table_size_update,
2877 size_update_emitted: false,
2878 // Reset on every write pass; `check_header_capacity` flips it
2879 // mid-call and `finalize` commits the abort by flipping
2880 // `kawa.parsing_phase` to Error so the next pass emits
2881 // RST_STREAM(InternalError).
2882 pending_oversized_abort: false,
2883 };
2884 self.priorities_buf.clear();
2885 self.priorities_buf.extend(self.streams.keys().copied());
2886 // RFC 9218 §4 primary sort: ascending urgency, then stream ID for
2887 // stability. The incremental flag is handled by
2888 // `apply_incremental_rotation` below so it does not perturb the
2889 // non-incremental fast path.
2890 self.priorities_buf.sort_by_cached_key(|id| {
2891 let (urgency, _) = self.prioriser.get(id);
2892 (urgency, *id)
2893 });
2894 // RFC 9218 §4: inside each urgency bucket, move incremental streams
2895 // to the tail and rotate them by the per-connection round-robin
2896 // cursor so no single slow-draining stream can starve its
2897 // same-urgency incremental peers.
2898 let incremental_count = self
2899 .prioriser
2900 .apply_incremental_rotation(&mut self.priorities_buf);
2901
2902 // RFC 9218 §4 refinement (Tier 3a): the connection-global
2903 // `incremental_count` is too coarse for `converter.incremental_peer_count`.
2904 // A solo `u=0, i` stream with an unrelated `u=7, i` peer in a
2905 // different urgency bucket would still see `incremental_peer_count > 1`
2906 // and voluntarily yield — stranding bytes the invariant-15/16 guards
2907 // were meant to prevent. Scope the count to same-urgency streams that
2908 // are actually ready to emit this pass (eligibility mirrors the check
2909 // in the write loop below).
2910 let mut ready_incremental_by_urgency: HashMap<u8, usize> = HashMap::new();
2911 for &sid in self.priorities_buf.iter() {
2912 let (urgency, is_incremental) = self.prioriser.get(&sid);
2913 if !is_incremental {
2914 continue;
2915 }
2916 let Some(&gid) = self.streams.get(&sid) else {
2917 continue;
2918 };
2919 let wbuffer = match self.position {
2920 Position::Server => &context.streams[gid].back,
2921 Position::Client(..) => &context.streams[gid].front,
2922 };
2923 if wbuffer.is_main_phase()
2924 || (wbuffer.is_terminated() && !wbuffer.is_completed())
2925 || (wbuffer.is_error() && !self.rst_sent.contains(&sid))
2926 {
2927 *ready_incremental_by_urgency.entry(urgency).or_insert(0) += 1;
2928 }
2929 }
2930
2931 trace!(
2932 "{} PRIORITIES: {:?} (incremental_count={}, per_bucket={:?})",
2933 log_context!(self),
2934 self.priorities_buf,
2935 incremental_count,
2936 ready_incremental_by_urgency
2937 );
2938 let mut socket_write = false;
2939 // RFC 9218 §4 round-robin: remember the first incremental stream we
2940 // served this pass so we can advance `Prioriser::incremental_cursor`
2941 // to it, causing the next pass to start with the stream just after.
2942 let mut first_incremental_fired: Option<StreamId> = None;
2943 // Total outbound bytes emitted across all stream flushes this pass —
2944 // `finalize_write` uses this to distinguish a voluntary scheduler
2945 // yield (progress + pending back-buffer, LIFECYCLE §9 invariant 16)
2946 // from a no-progress wait state (e.g. flow-control starvation).
2947 let mut total_bytes_written: usize = 0;
2948 // Collect every fresh RST_STREAM emitted via the converter
2949 // (`initialize` chokepoint or the HPACK over-budget abort path)
2950 // so we can run `account_emitted_rst` for each one AFTER the
2951 // converter is dropped — the converter holds `&mut self.encoder`
2952 // for the loop body so we cannot take `&mut self` until then.
2953 let mut freshly_emitted_rsts: Vec<H2Error> = Vec::new();
2954 'outer: for idx in 0..self.priorities_buf.len() {
2955 let stream_id = self.priorities_buf[idx];
2956 let Some(&global_stream_id) = self.streams.get(&stream_id) else {
2957 error!(
2958 "{} stream_id {} from sorted keys missing in streams map",
2959 log_context!(self),
2960 stream_id
2961 );
2962 continue;
2963 };
2964 let (urgency, is_incremental) = self.prioriser.get(&stream_id);
2965 let stream = &mut context.streams[global_stream_id];
2966 let stream_state = stream.state;
2967 let parts = stream.split(&self.position);
2968 let kawa = parts.wbuffer;
2969 // Hoisted out of the gate below so the post-flush flow-control-stall
2970 // classification can see how many flow-control bytes this pass moved.
2971 let mut consumed: i32 = 0;
2972 if kawa.is_main_phase()
2973 || (kawa.is_terminated() && !kawa.is_completed())
2974 || (kawa.is_error() && !self.rst_sent.contains(&stream_id))
2975 {
2976 let window = min(*parts.window, self.flow_control.window);
2977 converter.window = window;
2978 converter.stream_id = stream_id;
2979 // RFC 9218 §4: incremental streams yield the converter after
2980 // a single DATA frame so same-urgency peers interleave.
2981 converter.incremental_mode = is_incremental;
2982 // Same-urgency-bucket ready-peer count (Tier 3a, LIFECYCLE §9
2983 // invariant 17). The converter skips the yield when there is
2984 // no peer in the same bucket to interleave with — prevents
2985 // the `finalize_write` WRITABLE-withdrawal strand (see
2986 // `test_h2_solo_incremental_drains_fully`). A connection-wide
2987 // count would wrongly yield for a solo incremental stream
2988 // when another urgency bucket happens to contain an
2989 // incremental peer.
2990 converter.incremental_peer_count = ready_incremental_by_urgency
2991 .get(&urgency)
2992 .copied()
2993 .unwrap_or(0);
2994 // Track RST_STREAM dedup: if kawa is in error state, the converter
2995 // will generate a RST_STREAM frame via `initialize`. Mark it so we
2996 // don't send a duplicate on the next writable cycle.
2997 if kawa.is_error() {
2998 let freshly_rst = self.rst_sent.insert(stream_id);
2999 // LIFECYCLE §9 invariant 17: any transition to ineligible
3000 // mid-pass MUST decrement ready_incremental_by_urgency so
3001 // later streams in the same 'outer iteration see the live
3002 // count, not the snapshot. Missing this costs one voluntary
3003 // yield per same-urgency peer that trails the RST.
3004 if freshly_rst && is_incremental {
3005 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3006 *c = c.saturating_sub(1);
3007 }
3008 }
3009 // Account for the RST that `initialize` is about to emit
3010 // for this stream. Without this the MadeYouReset lifetime
3011 // cap is evadable: any path that flips `parsing_phase` to
3012 // Error before reaching this gate (oversized inbound
3013 // trailers, malformed bodies, etc.) would land an
3014 // unaccounted RST on the wire. We defer the actual
3015 // accounting call until after `drop(converter)` — the
3016 // converter holds `&mut self.encoder` here.
3017 if freshly_rst {
3018 freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
3019 }
3020 }
3021 // Apply per-frontend response-side header edits
3022 // (set/replace/delete) stashed by the routing layer at
3023 // request time. H2 frontends always run as Server
3024 // position; the back-side H2 client (when sozu speaks
3025 // H2 to a backend) is a request emission and was
3026 // already mutated by Router::route_from_request.
3027 //
3028 // The snapshot is **drained** via `mem::take` so the
3029 // injection runs exactly once per response. Without
3030 // this, a re-entry of `write_streams` for the same
3031 // stream (multi-frame body, flow-control yield, or
3032 // RFC 9218 same-urgency round-robin) would re-call
3033 // `apply_response_header_edits` after `kawa.prepare`
3034 // had already consumed the `Block::Flags{end_header}`
3035 // anchor — the helper falls back to
3036 // `kawa.blocks.len()` and appends the edit AFTER all
3037 // remaining DATA blocks. The next prepare cycle then
3038 // encodes that orphan `Block::Header` into
3039 // `H2BlockConverter.out` with no closing
3040 // `Block::Flags{end_header}` to flush it as a HEADERS
3041 // frame, and `H2BlockConverter::finalize` trips the
3042 // "out buffer not empty (38 bytes remaining), clearing"
3043 // defense-in-depth log on every re-entry. 38 bytes is
3044 // the static-table HPACK encoding of a typical HSTS
3045 // header, which is how the symptom surfaces in
3046 // production once the listener-default HSTS reaches a
3047 // non-trivial share of frontends.
3048 if matches!(self.position, super::Position::Server)
3049 && !parts.context.headers_response.is_empty()
3050 {
3051 let edits = std::mem::take(&mut parts.context.headers_response);
3052 super::shared::apply_response_header_edits(kawa, &edits);
3053 }
3054 kawa.prepare(&mut converter);
3055 // The pre-prepare gate at line 2483 only inserts into
3056 // `rst_sent` when `kawa.is_error()` is already true on
3057 // entry. The HPACK over-budget abort path
3058 // (`H2BlockConverter::check_header_capacity` →
3059 // `finalize`) flips `parsing_phase` to Error AND pushes
3060 // its own RST_STREAM frame inside this same prepare
3061 // pass; without a post-prepare insert here the next
3062 // writable cycle would gate-pass and double-emit a
3063 // RST_STREAM via the existing `initialize` chokepoint.
3064 //
3065 // Per Codex P2: the converter's direct RST emission
3066 // bypasses the metric/flood accounting that
3067 // `Self::reset_stream` performs. Mirror it here so a
3068 // peer that drives oversized headers across many
3069 // streams cannot escape the MadeYouReset emitted-RST
3070 // lifetime cap and so dashboards see the per-error
3071 // counter and the global tx counter.
3072 //
3073 // Per Codex P3: when an incremental stream flips to
3074 // Error mid-prepare, the RFC 9218 §4 yield-after-one
3075 // accounting must drop this stream from the
3076 // same-urgency ready bucket so trailing peers see the
3077 // live count.
3078 let freshly_rst_post_prepare = kawa.is_error() && self.rst_sent.insert(stream_id);
3079 if freshly_rst_post_prepare {
3080 // Defer accounting until after `drop(converter)`; same
3081 // reason as the pre-prepare collector above.
3082 freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
3083 if is_incremental {
3084 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3085 *c = c.saturating_sub(1);
3086 }
3087 }
3088 }
3089 consumed = window - converter.window;
3090 *parts.window = parts.window.saturating_sub(consumed);
3091 self.flow_control.window = self.flow_control.window.saturating_sub(consumed);
3092 if is_incremental && consumed > 0 && first_incremental_fired.is_none() {
3093 first_incremental_fired = Some(stream_id);
3094 }
3095 }
3096 context.debug.push(DebugEvent::S(
3097 stream_id,
3098 global_stream_id,
3099 kawa.parsing_phase,
3100 kawa.blocks.len(),
3101 kawa.out.len(),
3102 ));
3103 let mut stream_bytes: usize = 0;
3104 let outcome = Self::flush_stream_out(
3105 &mut self.socket,
3106 kawa,
3107 parts.metrics,
3108 &self.position,
3109 &mut self.readiness,
3110 &mut context.debug,
3111 3,
3112 global_stream_id,
3113 Some(&mut socket_write),
3114 None,
3115 &mut io_slices,
3116 Some(&mut stream_bytes),
3117 );
3118 // Refresh the per-stream idle timer on outbound bytes. Without
3119 // this, a long-running response trickled at low bandwidth would
3120 // be killed by `cancel_timed_out_streams` mid-delivery — the
3121 // inbound-only refresh at h2.rs:3887-3895 / 4026-4031 never
3122 // fires while the peer is idle.
3123 if stream_bytes > 0 {
3124 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
3125 *t = Instant::now();
3126 }
3127 }
3128 // Arm/age the dedicated flow-control-stall deadline that catches a
3129 // window-stalled stream — a buffered RESPONSE to a slow frontend
3130 // (`Position::Server`) OR a buffered request UPLOAD to a slow H2
3131 // backend (`Position::Client`): window-stall reaping is bidirectional
3132 // by design (M4), so there is no position gate here. Set only when the
3133 // stream holds sendable buffered data it cannot send because its
3134 // effective send window is exhausted; unlike `stream_last_activity_at`
3135 // it is NEVER refreshed by inbound DATA/HEADERS, so a peer dribbling
3136 // 1-byte DATA cannot keep it warm.
3137 //
3138 // M2 cumulative-stall budget: a genuinely OPEN window clears the
3139 // deadline immediately (real un-stall). While the window stays
3140 // blocked, accumulate this pass's outbound drain; only cumulative
3141 // progress reaching `FC_STALL_CLEAR_FLOOR` (a full frame of real
3142 // delivery) clears it. A `WINDOW_UPDATE(+1)` drip drains ~1 byte/pass
3143 // straight back to a zero window, so it never reaches the floor — the
3144 // deadline ages out and `cancel_timed_out_streams` RST(CANCEL)s the
3145 // slot-pinning stream after `stream_idle_timeout`.
3146 let outbound_window_blocked = has_sendable_response(kawa)
3147 && min(*parts.window, self.flow_control.window) <= 0
3148 && (!kawa.blocks.is_empty() || !kawa.out.is_empty());
3149 match fc_stall_budget_decision(
3150 outbound_window_blocked,
3151 consumed,
3152 self.stream_fc_stalled_progress.get(&stream_id).copied(),
3153 ) {
3154 FcStallAction::Clear => {
3155 self.stream_fc_stalled_since.remove(&stream_id);
3156 self.stream_fc_stalled_progress.remove(&stream_id);
3157 }
3158 FcStallAction::Arm { progress } => {
3159 self.stream_fc_stalled_since
3160 .entry(stream_id)
3161 .or_insert_with(Instant::now);
3162 self.stream_fc_stalled_progress.insert(stream_id, progress);
3163 }
3164 }
3165 total_bytes_written = total_bytes_written.saturating_add(stream_bytes);
3166 if outcome == FlushOutcome::Stalled {
3167 self.expect_write = Some(H2StreamId::Other {
3168 id: stream_id,
3169 gid: global_stream_id,
3170 });
3171 break 'outer;
3172 }
3173 self.expect_write = None;
3174 if (kawa.is_terminated() || kawa.is_error())
3175 && kawa.is_completed()
3176 && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
3177 {
3178 let close_frontend =
3179 matches!(self.position, Position::Server) && !parts.context.keep_alive_frontend;
3180 let (client_rtt, server_rtt) = Self::snapshot_rtts(
3181 &self.position,
3182 &self.socket,
3183 &endpoint,
3184 stream.linked_token(),
3185 );
3186
3187 if let Some((dead_id, token)) = Self::try_recycle_server_stream(
3188 &self.position,
3189 &mut self.bytes,
3190 &self.streams,
3191 stream,
3192 global_stream_id,
3193 stream_id,
3194 byte_totals,
3195 &mut context.debug,
3196 context.listener.clone(),
3197 client_rtt,
3198 server_rtt,
3199 ) {
3200 completed_streams.push((dead_id, global_stream_id, token, close_frontend));
3201 // LIFECYCLE §9 invariant 17: decrement INSIDE 'outer so
3202 // later iterations see the reduced count. The post-loop
3203 // retirement at remove_dead_stream is too late.
3204 if is_incremental {
3205 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
3206 *c = c.saturating_sub(1);
3207 }
3208 }
3209 }
3210 }
3211 }
3212 gauge!(
3213 "h2.streams.ready_incremental.by_urgency",
3214 ready_incremental_by_urgency
3215 .values()
3216 .copied()
3217 .sum::<usize>()
3218 );
3219 // Reclaim the converter's reusable buffers before any &mut self calls,
3220 // since the converter borrows self.encoder.
3221 let converter_out = std::mem::take(&mut converter.out);
3222 let lowercase_buf = std::mem::take(&mut converter.lowercase_buf);
3223 let cookie_buf = std::mem::take(&mut converter.cookie_buf);
3224 // RFC 7541 §6.3: clear our mirror of the pending size-update only
3225 // AFTER the converter confirmed the signal was emitted to its
3226 // output buffer. A DATA-only pass leaves `size_update_emitted` as
3227 // `false` so the signal stays queued for the next pass with a
3228 // header block.
3229 let size_update_emitted = converter.size_update_emitted;
3230 drop(converter);
3231 if size_update_emitted {
3232 self.pending_table_size_update = None;
3233 }
3234 // Account every RST that the converter emitted during this pass
3235 // (pre-prepare gate + post-prepare HPACK over-budget abort) so
3236 // the global tx counter, the per-error breakdown, and the
3237 // MadeYouReset emitted-RST lifetime cap stay in step. If the
3238 // cap trips, propagate the GOAWAY result.
3239 for error in freshly_emitted_rsts {
3240 if let Some(result) = self.account_emitted_rst(error) {
3241 return result;
3242 }
3243 }
3244 self.converter_buf = converter_out;
3245 self.lowercase_buf = lowercase_buf;
3246 self.cookie_buf = cookie_buf;
3247 self.shrink_converter_buffers();
3248 // RFC 9218 §4: commit the round-robin cursor so the next writable
3249 // cycle begins with the stream immediately after the one we fired
3250 // first this pass.
3251 self.prioriser
3252 .advance_incremental_cursor(first_incremental_fired);
3253 let mut close_frontend_after_completed_stream = false;
3254 for (dead_id, global_stream_id, token, close_frontend) in completed_streams {
3255 // The main write loop borrows self.encoder, so we can't mutate the
3256 // H2 maps inline. Retire the recycled stream immediately after the
3257 // converter borrow ends, before endpoint.end_stream() can trigger
3258 // teardown and observe a stale `Recycle` entry in self.streams.
3259 self.remove_dead_stream(dead_id, global_stream_id);
3260 close_frontend_after_completed_stream |= close_frontend;
3261 if let Some(token) = token {
3262 remove_backend_stream(&mut context.backend_streams, token, global_stream_id);
3263 endpoint.end_stream(token, global_stream_id, context);
3264 }
3265 }
3266 if close_frontend_after_completed_stream && !self.drain.draining {
3267 return if self.streams.is_empty() {
3268 self.goaway(H2Error::NoError)
3269 } else {
3270 self.graceful_goaway()
3271 };
3272 }
3273 self.finalize_write(socket_write, total_bytes_written, context)
3274 }
3275
3276 /// Remove streams that completed their lifecycle from all tracking maps.
3277 /// After forwarding a 1xx informational response (100 Continue, 103 Early Hints),
3278 /// reset the back buffer and re-enable backend readable so the final response
3279 /// can arrive on the same stream. Returns true if the response was 1xx.
3280 #[allow(clippy::too_many_arguments)]
3281 fn flush_stream_out(
3282 socket: &mut Front,
3283 kawa: &mut GenericHttpStream,
3284 metrics: &mut SessionMetrics,
3285 position: &Position,
3286 readiness: &mut Readiness,
3287 debug: &mut DebugHistory,
3288 debug_site: usize,
3289 global_stream_id: GlobalStreamId,
3290 mut wrote: Option<&mut bool>,
3291 cross_read_amount: Option<usize>,
3292 io_slices: &mut Vec<IoSlice<'static>>,
3293 mut bytes_written: Option<&mut usize>,
3294 ) -> FlushOutcome {
3295 while !kawa.out.is_empty() {
3296 if let Some(flag) = wrote.as_deref_mut() {
3297 *flag = true;
3298 }
3299 io_slices.clear();
3300 let buffer = kawa.storage.buffer();
3301 for block in kawa.out.iter() {
3302 match block {
3303 kawa::OutBlock::Delimiter => break,
3304 kawa::OutBlock::Store(store) => {
3305 let data = store.data(buffer);
3306 // SAFETY: the IoSlice references point into kawa's
3307 // storage buffer. They are used only for the
3308 // socket_write_vectored call below and cleared
3309 // immediately after, before kawa.consume() which may
3310 // relocate the buffer via ptr::copy (shift). No
3311 // dangling 'static refs exist during consume().
3312 let data: &'static [u8] =
3313 unsafe { std::slice::from_raw_parts(data.as_ptr(), data.len()) };
3314 io_slices.push(IoSlice::new(data));
3315 }
3316 }
3317 }
3318 let (size, status) = socket.socket_write_vectored(io_slices);
3319 io_slices.clear();
3320 debug_assert!(
3321 io_slices.is_empty(),
3322 "IoSlice refs must be cleared before consume"
3323 );
3324 debug.push(DebugEvent::SocketIO(debug_site, global_stream_id, size));
3325 kawa.consume(size);
3326 position.count_bytes_out_counter(size);
3327 position.count_bytes_out(metrics, size);
3328 if let Some(counter) = bytes_written.as_deref_mut() {
3329 *counter = counter.saturating_add(size);
3330 }
3331 if let Some(amount) = cross_read_amount {
3332 // Resume path: same stream is parked waiting for buffer space.
3333 // Re-enable READABLE once the write freed enough room.
3334 if kawa.storage.available_space() >= amount {
3335 readiness.interest.insert(Ready::READABLE);
3336 }
3337 }
3338 if update_readiness_after_write(size, status, readiness) {
3339 return FlushOutcome::Stalled;
3340 }
3341 }
3342 FlushOutcome::Drained
3343 }
3344
3345 fn handle_1xx_reset<E: Endpoint>(
3346 kawa: &mut GenericHttpStream,
3347 stream_state: StreamState,
3348 endpoint: &mut E,
3349 ) -> bool {
3350 let is_1xx = matches!(
3351 kawa.detached.status_line,
3352 kawa::StatusLine::Response { code, .. } if (100..200).contains(&code)
3353 );
3354 if !is_1xx {
3355 return false;
3356 }
3357 debug!(
3358 "{} H2 write_streams: 1xx informational forwarded, resetting back buffer",
3359 log_module_context!()
3360 );
3361 kawa.clear();
3362 if let StreamState::Linked(token) = stream_state {
3363 let readiness = endpoint.readiness_mut(token);
3364 readiness.interest.insert(Ready::READABLE);
3365 readiness.signal_pending_read();
3366 }
3367 true
3368 }
3369
3370 /// Re-arm edge-triggered WRITABLE event if rustls still has buffered TLS data.
3371 fn ensure_tls_flushed(&mut self) {
3372 if self.socket.socket_wants_write() {
3373 self.readiness.signal_pending_write();
3374 }
3375 }
3376
3377 /// Evict every per-stream piece of state carried by this `ConnectionH2`.
3378 ///
3379 /// **Invariant**: `rst_sent`, `stream_last_activity_at`,
3380 /// `stream_fc_stalled_since`, `stream_fc_stalled_progress` and `prioriser`
3381 /// MUST be emptied of `stream_id` here — they are the only five per-stream
3382 /// caches that are not stored in the slab-allocated
3383 /// `Context.streams[]`. Forgetting any of them causes unbounded memory
3384 /// growth on long-lived connections with many cancelled streams. The
3385 /// `debug_assert`s below fail loudly in test builds if someone adds a
3386 /// new per-stream cache without updating this function.
3387 fn remove_dead_stream(&mut self, stream_id: StreamId, global_stream_id: GlobalStreamId) {
3388 if self.streams.remove(&stream_id).is_none() {
3389 error!(
3390 "{} dead stream_id {} missing from streams map",
3391 log_context!(self),
3392 stream_id
3393 );
3394 }
3395 self.rst_sent.remove(&stream_id);
3396 self.stream_last_activity_at.remove(&stream_id);
3397 self.stream_fc_stalled_since.remove(&stream_id);
3398 self.stream_fc_stalled_progress.remove(&stream_id);
3399 self.prioriser.remove(&stream_id);
3400 debug_assert!(
3401 !self.rst_sent.contains(&stream_id),
3402 "rst_sent still contains stream_id {stream_id} after eviction"
3403 );
3404 debug_assert!(
3405 !self.stream_last_activity_at.contains_key(&stream_id),
3406 "stream_last_activity_at still contains stream_id {stream_id} after eviction"
3407 );
3408 debug_assert!(
3409 !self.stream_fc_stalled_since.contains_key(&stream_id),
3410 "stream_fc_stalled_since still contains stream_id {stream_id} after eviction"
3411 );
3412 debug_assert!(
3413 !self.stream_fc_stalled_progress.contains_key(&stream_id),
3414 "stream_fc_stalled_progress still contains stream_id {stream_id} after eviction"
3415 );
3416 // Invariant: expect_write/expect_read must not reference a gid whose
3417 // context slot may be popped by shrink_trailing_recycle after eviction.
3418 if matches!(self.expect_write, Some(H2StreamId::Other { gid, .. }) if gid == global_stream_id)
3419 {
3420 self.expect_write = None;
3421 }
3422 if matches!(
3423 self.expect_read,
3424 Some((H2StreamId::Other { gid, .. }, _)) if gid == global_stream_id
3425 ) {
3426 self.expect_read = None;
3427 }
3428 }
3429
3430 /// Drop stream-id mappings for streams that never became active before a
3431 /// connection-level close. This happens on incomplete/oversized header
3432 /// blocks: the stream slot is created on the initial HEADERS frame, then a
3433 /// GOAWAY closes the connection before the request is fully materialized.
3434 fn prune_inactive_streams_while_closing<L>(&mut self, context: &mut Context<L>)
3435 where
3436 L: ListenerHandler + L7ListenerHandler,
3437 {
3438 if !self.drain.draining || !matches!(self.state, H2State::GoAway | H2State::Error) {
3439 return;
3440 }
3441
3442 let stale_streams = self
3443 .streams
3444 .iter()
3445 .filter_map(|(&stream_id, &global_stream_id)| {
3446 (!context.streams[global_stream_id].state.is_open())
3447 .then_some((stream_id, global_stream_id))
3448 })
3449 .collect::<Vec<_>>();
3450
3451 for (stream_id, global_stream_id) in stale_streams {
3452 let stream = &mut context.streams[global_stream_id];
3453 if stream.state == StreamState::Idle {
3454 stream.front.clear();
3455 stream.front.storage.clear();
3456 stream.back.clear();
3457 stream.back.storage.clear();
3458 stream.metrics.reset();
3459 stream.state = StreamState::Recycle;
3460 }
3461 self.remove_dead_stream(stream_id, global_stream_id);
3462 }
3463 }
3464
3465 /// Shrink reusable converter buffers when they grow beyond 16 KB to avoid
3466 /// holding memory after a burst of large headers.
3467 fn shrink_converter_buffers(&mut self) {
3468 if self.converter_buf.capacity() > 16_384 {
3469 self.converter_buf.shrink_to(4096);
3470 }
3471 if self.lowercase_buf.capacity() > 16_384 {
3472 self.lowercase_buf.shrink_to(4096);
3473 }
3474 if self.cookie_buf.capacity() > 16_384 {
3475 self.cookie_buf.shrink_to(4096);
3476 }
3477 }
3478
3479 /// Post-write phase: check drain completion, flush TLS, and update readiness.
3480 ///
3481 /// `bytes_written_this_pass` reports the total outbound bytes `write_streams`
3482 /// pushed to the socket (across every stream), and is used to distinguish
3483 /// two very different "no `expect_write`" states:
3484 ///
3485 /// - **Voluntary yield with progress**: at least one DATA/HEADERS frame
3486 /// emitted, but a stream left non-empty `back.out`/`back.blocks` because
3487 /// the converter yielded (e.g. RFC 9218 incremental rotation). LIFECYCLE
3488 /// §9 invariant 16: keep `Ready::WRITABLE` armed so the session loop can
3489 /// resume flushing on the next tick without waiting for an external
3490 /// wake-up that edge-triggered epoll will not deliver.
3491 /// - **No progress at all**: converter pushed every block back (e.g. flow
3492 /// window exhausted, no HEADERS ready yet). Strip `Ready::WRITABLE` —
3493 /// forward progress must come from an external trigger
3494 /// (`WINDOW_UPDATE`, new request), not from looping writable().
3495 ///
3496 /// Returns `MuxResult::Continue` in the normal case, or triggers a graceful
3497 /// GOAWAY when draining and all streams have completed.
3498 fn finalize_write<L>(
3499 &mut self,
3500 socket_write: bool,
3501 bytes_written_this_pass: usize,
3502 context: &mut Context<L>,
3503 ) -> MuxResult
3504 where
3505 L: ListenerHandler + L7ListenerHandler,
3506 {
3507 // RFC 9113 §6.8: if draining and all streams have completed,
3508 // send the final GOAWAY with the actual last_stream_id
3509 if self.drain.draining && self.streams.is_empty() {
3510 return self.graceful_goaway();
3511 }
3512
3513 if self.socket.socket_wants_write() {
3514 if !socket_write {
3515 self.socket.socket_write(&[]);
3516 }
3517 // Edge-triggered epoll: re-arm WRITABLE if rustls still has
3518 // pending encrypted data (first check triggers flush, second re-checks).
3519 self.ensure_tls_flushed();
3520 } else if self.expect_write.is_none() {
3521 // LIFECYCLE §9 invariant 16: retain `Ready::WRITABLE` when a
3522 // voluntary scheduler yield leaves stranded bytes in a stream's
3523 // `back.out`/`back.blocks` *after* the pass made forward
3524 // progress. Requiring progress avoids the degenerate no-progress
3525 // loop (e.g. flow-control-starved streams) that would otherwise
3526 // busy-spin against the session dispatcher.
3527 if bytes_written_this_pass > 0
3528 && any_stream_has_pending_back(&self.streams, &context.streams)
3529 {
3530 #[cfg(debug_assertions)]
3531 context.debug.push(DebugEvent::Str(
3532 "finalize_write: invariant 16 retained WRITABLE (pending back-buffer)"
3533 .to_owned(),
3534 ));
3535 } else if !self.pending_rst_streams.is_empty()
3536 || !self.flow_control.pending_window_updates.is_empty()
3537 {
3538 // Control-frame liveness: `flush_pending_control_frames` is
3539 // gated on `expect_write.is_none()`, so when a prior partial
3540 // write deferred the flush the RST / WINDOW_UPDATE queues
3541 // stay non-empty after `expect_write` finally drains. Without
3542 // this rearm the next tick would drop `Ready::WRITABLE` and
3543 // the queued RST would stall until an unrelated event
3544 // re-triggered writable — which is exactly the scenario
3545 // h2spec trips by sending back-to-back malformed streams.
3546 #[cfg(debug_assertions)]
3547 context.debug.push(DebugEvent::Str(
3548 "finalize_write: retained WRITABLE (control queue non-empty)".to_owned(),
3549 ));
3550 self.readiness.arm_writable();
3551 incr!(names::h2::SIGNAL_WRITABLE_REARMED_CONTROL_QUEUE);
3552 } else {
3553 // We wrote everything
3554 #[cfg(debug_assertions)]
3555 context.debug.push(DebugEvent::Str(format!(
3556 "Wrote everything: {:?}",
3557 self.streams
3558 )));
3559 self.readiness.interest.remove(Ready::WRITABLE);
3560 }
3561 }
3562 MuxResult::Continue
3563 }
3564
3565 /// Flush pending control frames (zero-buffer resume, WINDOW_UPDATEs, RST_STREAMs)
3566 /// before entering the main writable state machine.
3567 ///
3568 /// Returns `Some(result)` if the caller should return early (e.g. socket would
3569 /// block, GOAWAY triggered), or `None` if writable() should proceed normally.
3570 fn flush_pending_control_frames(&mut self) -> Option<MuxResult> {
3571 if self.frontend_hung_up_while_draining() {
3572 self.expect_write = None;
3573 self.zero.storage.clear();
3574 self.flow_control.pending_window_updates.clear();
3575 self.pending_rst_streams.clear();
3576 }
3577
3578 // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
3579 if let Some(sent_at) = self.settings_sent_at {
3580 if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
3581 warn!(
3582 "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
3583 log_context!(self),
3584 SETTINGS_ACK_TIMEOUT
3585 );
3586 return Some(self.goaway(H2Error::SettingsTimeout));
3587 }
3588 }
3589
3590 // Stage — resume zero-buffer flush.
3591 // If a previous write was partial, finish it before serialising any
3592 // new control frames. Don't reset the timeout for control frame
3593 // writes (SETTINGS ACK, PING response, WINDOW_UPDATE) — only
3594 // application-data writes should reset it.
3595 if let Some(H2StreamId::Zero) = self.expect_write {
3596 if self.flush_zero_to_socket() {
3597 self.ensure_tls_flushed();
3598 return Some(MuxResult::Continue);
3599 }
3600 // When H2StreamId::Zero is used to write, READABLE is disabled —
3601 // re-enable it now that the flush is complete.
3602 self.readiness.interest.insert(Ready::READABLE);
3603 self.expect_write = None;
3604 }
3605
3606 // Stage — drain pending WINDOW_UPDATE frames.
3607 // Serialize and flush them inline to avoid extra event loop
3608 // iterations that could cause response data to be sent before
3609 // subsequent frames are validated.
3610 if !self.flow_control.pending_window_updates.is_empty() && self.expect_write.is_none() {
3611 let kawa = &mut self.zero;
3612 kawa.storage.clear();
3613 let buf = kawa.storage.space();
3614 let mut offset = 0;
3615 // Track which entries we successfully serialized so we can remove them.
3616 // Each WINDOW_UPDATE frame is 13 bytes (9-byte header + 4-byte payload).
3617 let mut written_ids = Vec::new();
3618 for (&stream_id, &increment) in &self.flow_control.pending_window_updates {
3619 if increment == 0 {
3620 written_ids.push(stream_id);
3621 continue;
3622 }
3623 match serializer::gen_window_update(&mut buf[offset..], stream_id, increment) {
3624 Ok((_, size)) => {
3625 offset += size;
3626 written_ids.push(stream_id);
3627 incr!(names::h2::FRAMES_TX_WINDOW_UPDATE);
3628 }
3629 Err(_) => {
3630 // Buffer full — stop here, remaining entries stay in the map
3631 break;
3632 }
3633 }
3634 }
3635 // Remove only the entries we successfully wrote (or skipped)
3636 for id in written_ids {
3637 self.flow_control.pending_window_updates.remove(&id);
3638 }
3639 if offset > 0 {
3640 kawa.storage.fill(offset);
3641 if self.flush_zero_to_socket() {
3642 self.expect_write = Some(H2StreamId::Zero);
3643 // Edge-triggered epoll: ensure pending TLS data gets flushed
3644 if self.socket.socket_wants_write() {
3645 self.readiness.event.insert(Ready::WRITABLE);
3646 }
3647 return Some(MuxResult::Continue);
3648 }
3649 }
3650 }
3651
3652 // Stage — RST_STREAM cap check + drain.
3653 // Check the lifetime total (not just pending queue length) because
3654 // writable() drains the queue between readable() calls, so the
3655 // pending count alone may never reach the cap even under sustained
3656 // misbehavior.
3657 if !matches!(self.state, H2State::GoAway | H2State::Error)
3658 && self.total_rst_streams_queued >= MAX_PENDING_RST_STREAMS
3659 {
3660 error!(
3661 "{} total RST_STREAM count {} exceeds cap {}, sending GOAWAY(ENHANCE_YOUR_CALM)",
3662 log_context!(self),
3663 self.total_rst_streams_queued,
3664 MAX_PENDING_RST_STREAMS
3665 );
3666 return Some(self.goaway(H2Error::EnhanceYourCalm));
3667 }
3668
3669 // Flush pending RST_STREAM frames (queued when refusing streams).
3670 // Accounting happens at queue-time inside `Self::enqueue_rst`, so
3671 // this drain only serialises and flushes — no metric/flood calls
3672 // here would double-count.
3673 if !self.pending_rst_streams.is_empty() && self.expect_write.is_none() {
3674 let kawa = &mut self.zero;
3675 kawa.storage.clear();
3676 let buf = kawa.storage.space();
3677 let mut offset = 0;
3678 let mut written_count = 0;
3679 for &(stream_id, ref error) in &self.pending_rst_streams {
3680 let frame_size =
3681 parser::FRAME_HEADER_SIZE + parser::RST_STREAM_PAYLOAD_SIZE as usize;
3682 if offset + frame_size > buf.len() {
3683 break;
3684 }
3685 match serializer::gen_rst_stream(&mut buf[offset..], stream_id, error.to_owned()) {
3686 Ok((_, _)) => {
3687 offset += frame_size;
3688 written_count += 1;
3689 }
3690 Err(_) => break,
3691 }
3692 }
3693 self.pending_rst_streams.drain(..written_count);
3694 if offset > 0 {
3695 kawa.storage.fill(offset);
3696 if self.flush_zero_to_socket() {
3697 self.expect_write = Some(H2StreamId::Zero);
3698 // Edge-triggered epoll: ensure pending TLS data gets flushed
3699 if self.socket.socket_wants_write() {
3700 self.readiness.event.insert(Ready::WRITABLE);
3701 }
3702 return Some(MuxResult::Continue);
3703 }
3704 }
3705 }
3706
3707 None
3708 }
3709
3710 pub fn writable<E, L>(&mut self, context: &mut Context<L>, endpoint: E) -> MuxResult
3711 where
3712 E: Endpoint,
3713 L: ListenerHandler + L7ListenerHandler,
3714 {
3715 self.prune_inactive_streams_while_closing(context);
3716
3717 if let Some(result) = self.flush_pending_control_frames() {
3718 return result;
3719 }
3720
3721 // Flush any pending TLS records before state-specific processing.
3722 // This ensures response DATA frames that were accepted by rustls
3723 // (via socket_write_vectored in write_streams) are pushed to the
3724 // TCP socket even when the connection is in GoAway or Error state.
3725 // Without this, the state-specific handlers may call force_disconnect()
3726 // before the response data reaches the kernel's TCP send buffer.
3727 if self.socket.socket_wants_write() {
3728 self.socket.socket_write(&[]);
3729 }
3730
3731 match (&self.state, &self.position) {
3732 (H2State::Error, Position::Server) => {
3733 if self.socket.socket_wants_write() {
3734 self.ensure_tls_flushed();
3735 MuxResult::Continue
3736 } else {
3737 MuxResult::CloseSession
3738 }
3739 }
3740 (H2State::Error, _)
3741 | (H2State::ClientSettings, Position::Server)
3742 | (H2State::ServerSettings, Position::Client(..)) => {
3743 error!(
3744 "{} Unexpected combination: (Writable, {:?}, {:?})",
3745 log_context!(self),
3746 self.state,
3747 self.position
3748 );
3749 self.force_disconnect()
3750 }
3751 (H2State::ClientPreface, Position::Server) => MuxResult::Continue,
3752 // Discard state: pending data (e.g. RST_STREAM) was already
3753 // written in the preamble above; let the readable path consume
3754 // the remaining frame payload.
3755 (H2State::Discard, _) => MuxResult::Continue,
3756 (H2State::GoAway, _) => {
3757 if self.peer_gone_after_final_goaway() {
3758 return MuxResult::CloseSession;
3759 }
3760 // Flush any remaining TLS response data before disconnecting.
3761 // The GoAway state only enters after control frames (our GOAWAY
3762 // response) are flushed above, but response DATA frames may still
3763 // be in rustls's TLS output buffer — accepted by socket_write_vectored
3764 // during write_streams() but not yet flushed to TCP. Under TCP
3765 // backpressure (HAProxy chain), this is the primary truncation vector.
3766 if self.socket.socket_wants_write() {
3767 self.socket.socket_write(&[]);
3768 if self.socket.socket_wants_write() {
3769 // TLS data still pending (TCP backpressure) — don't disconnect
3770 // yet. Re-arm WRITABLE so the event loop retries the flush.
3771 self.ensure_tls_flushed();
3772 return MuxResult::Continue;
3773 }
3774 }
3775 self.force_disconnect()
3776 }
3777 (H2State::ClientPreface, Position::Client(..)) => {
3778 trace!("{} Preparing preface and settings", log_context!(self));
3779 let pri = serializer::H2_PRI.as_bytes();
3780 let kawa = &mut self.zero;
3781
3782 kawa.storage.space()[0..pri.len()].copy_from_slice(pri);
3783 kawa.storage.fill(pri.len());
3784 match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
3785 Ok((_, size)) => {
3786 kawa.storage.fill(size);
3787 incr!(names::h2::FRAMES_TX_SETTINGS);
3788 // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
3789 self.settings_sent_at = Some(Instant::now());
3790 }
3791 Err(error) => {
3792 error!(
3793 "{} Could not serialize SettingsFrame: {:?}",
3794 log_context!(self),
3795 error
3796 );
3797 return self.force_disconnect();
3798 }
3799 };
3800
3801 self.state = H2State::ClientSettings;
3802 self.expect_write = Some(H2StreamId::Zero);
3803 MuxResult::Continue
3804 }
3805 (H2State::ClientSettings, Position::Client(..)) => {
3806 trace!("{} Sent preface and settings", log_context!(self));
3807 self.state = H2State::ServerSettings;
3808 self.expect_read = Some((H2StreamId::Zero, 9));
3809 self.readiness.interest.remove(Ready::WRITABLE);
3810 MuxResult::Continue
3811 }
3812 (H2State::ServerSettings, Position::Server) => {
3813 // Enlarge the connection-level receive window beyond the RFC default
3814 // of 65 535 bytes. The configured window size is too small for
3815 // high-throughput proxying and causes excessive WINDOW_UPDATE
3816 // round-trips. Use additive increment rather than unconditional
3817 // assignment to preserve any window changes that occurred during
3818 // setup. Skip if the configured window equals the default (no
3819 // enlargement needed), since a zero-increment WINDOW_UPDATE
3820 // violates RFC 9113 §6.9.
3821 let increment = self
3822 .connection_config
3823 .initial_connection_window
3824 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
3825 if increment > 0 {
3826 self.queue_window_update(0, increment);
3827 }
3828 // Do NOT increment flow_control.window here: sending our own
3829 // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
3830 // Our send window is only updated by WINDOW_UPDATEs we receive
3831 // from the peer (RFC 9113 §6.9).
3832 self.expect_header();
3833 // Keep WRITABLE so the queued WINDOW_UPDATE gets flushed.
3834 MuxResult::Continue
3835 }
3836 // Proxying states — writing application data (request/response).
3837 // Reset the timeout here, not at the top of writable(), so that
3838 // control frame writes (PING, WINDOW_UPDATE) don't reset it.
3839 (H2State::Header, _)
3840 | (H2State::Frame(_), _)
3841 | (H2State::ContinuationFrame(_), _)
3842 | (H2State::ContinuationHeader(_), _) => self.write_streams(context, endpoint),
3843 }
3844 }
3845
3846 /// Snapshot the access-log RTTs for the local frontend and the linked backend.
3847 ///
3848 /// `Position::Server`-only. On a backend H2 connection (`Position::Client`)
3849 /// the snapshot would write swapped values onto the shared `Stream.metrics`:
3850 /// the connection's `socket` is the upstream and the corresponding
3851 /// `EndpointServer::socket` returns the frontend, so the per-stream
3852 /// `client_rtt`/`server_rtt` cells would be populated with mislabelled
3853 /// values. Gating keeps backend H2 from poisoning the access-log metric
3854 /// for the matching frontend stream.
3855 ///
3856 /// Callers must invoke this BEFORE `endpoint.end_stream(...)` on reset
3857 /// paths so the backend lookup does not depend on
3858 /// `EndpointClient::end_stream` continuing to leave entries in
3859 /// `Router.backends`.
3860 ///
3861 /// Takes individual field references (not `&self`) for the same reason
3862 /// `try_recycle_server_stream` does — to avoid borrow conflicts with the
3863 /// `H2BlockConverter` that holds `&mut self.encoder` during the per-stream
3864 /// write loop.
3865 fn snapshot_rtts<E: Endpoint>(
3866 position: &Position,
3867 socket: &Front,
3868 endpoint: &E,
3869 linked_token: Option<mio::Token>,
3870 ) -> (Option<Duration>, Option<Duration>) {
3871 if !position.is_server() {
3872 return (None, None);
3873 }
3874 (
3875 socket_rtt(socket.socket_ref()),
3876 linked_token
3877 .and_then(|t| endpoint.socket(t))
3878 .and_then(socket_rtt),
3879 )
3880 }
3881
3882 /// Try to recycle a completed server-side stream by distributing overhead,
3883 /// generating access logs, and transitioning the stream to `Recycle` state.
3884 ///
3885 /// Returns `Some((stream_id, Option<token>))` if the stream was recycled, so the
3886 /// caller can add `stream_id` to the dead-streams list and call `endpoint.end_stream()`
3887 /// if a token was returned. Returns `None` if recycling was deferred or not applicable.
3888 ///
3889 /// Takes individual field references instead of `&mut self` to avoid borrow
3890 /// conflicts when the H2 block converter holds `&mut self.encoder`.
3891 /// `client_rtt`/`server_rtt` are snapshotted by the caller (which still
3892 /// owns `&self.socket` and `&endpoint`) and forwarded into the access log.
3893 #[allow(clippy::too_many_arguments)]
3894 fn try_recycle_server_stream<L>(
3895 position: &Position,
3896 bytes: &mut H2ByteAccounting,
3897 streams: &HashMap<StreamId, GlobalStreamId>,
3898 stream: &mut crate::protocol::mux::Stream,
3899 global_stream_id: GlobalStreamId,
3900 stream_id: StreamId,
3901 byte_totals: (usize, usize),
3902 debug: &mut DebugHistory,
3903 listener: std::rc::Rc<std::cell::RefCell<L>>,
3904 client_rtt: Option<Duration>,
3905 server_rtt: Option<Duration>,
3906 ) -> Option<(StreamId, Option<mio::Token>)>
3907 where
3908 L: ListenerHandler + L7ListenerHandler,
3909 {
3910 match position {
3911 Position::Client(..) => None,
3912 Position::Server => {
3913 // Already logged by a reset path; retire the stream after its RST is flushed.
3914 if stream.metrics.start.is_none() {
3915 let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3916 return match state {
3917 StreamState::Linked(token) => Some((stream_id, Some(token))),
3918 _ => Some((stream_id, None)),
3919 };
3920 }
3921
3922 // Don't recycle if the client hasn't sent END_STREAM yet —
3923 // more DATA frames may arrive for this stream.
3924 if !stream.front_received_end_of_stream {
3925 trace!(
3926 "{} Defer recycle stream {}: client still sending",
3927 log_module_context!(),
3928 global_stream_id
3929 );
3930 return None;
3931 }
3932 let stream_bytes = (
3933 stream.metrics.bin + stream.metrics.backend_bin,
3934 stream.metrics.bout + stream.metrics.backend_bout,
3935 );
3936 distribute_overhead(
3937 &mut stream.metrics,
3938 &mut bytes.overhead_bin,
3939 &mut bytes.overhead_bout,
3940 stream_bytes,
3941 byte_totals,
3942 streams.len(),
3943 streams.len() == 1,
3944 );
3945 debug.push(DebugEvent::StreamEvent(4, global_stream_id));
3946 trace!(
3947 "{} Recycle stream: {}",
3948 log_module_context!(),
3949 global_stream_id
3950 );
3951 let token = Self::complete_server_stream(stream, listener, client_rtt, server_rtt);
3952 Some((stream_id, token))
3953 }
3954 }
3955 }
3956
3957 /// Finalize a server-side stream after its response has been fully written.
3958 ///
3959 /// Generates an access log, resets metrics, and transitions the stream to `Recycle`.
3960 /// Returns the backend token if the stream was `Linked`, so the caller can call
3961 /// `endpoint.end_stream()` with the full `Context` (which can't be passed here
3962 /// because `stream` borrows from `context.streams`).
3963 ///
3964 /// Callers must distribute overhead *before* calling this, since the converter
3965 /// borrow may prevent `distribute_overhead()`.
3966 fn complete_server_stream<L>(
3967 stream: &mut crate::protocol::mux::Stream,
3968 listener: std::rc::Rc<std::cell::RefCell<L>>,
3969 client_rtt: Option<Duration>,
3970 server_rtt: Option<Duration>,
3971 ) -> Option<mio::Token>
3972 where
3973 L: ListenerHandler + L7ListenerHandler,
3974 {
3975 incr!(names::http::E2E_H2);
3976 stream.metrics.backend_stop();
3977 stream.generate_access_log(
3978 false,
3979 Some("H2::Complete"),
3980 listener,
3981 client_rtt,
3982 server_rtt,
3983 );
3984 stream.metrics.reset();
3985 let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3986 if let StreamState::Linked(token) = state {
3987 Some(token)
3988 } else {
3989 None
3990 }
3991 }
3992
3993 /// Compute the total bytes transferred across all active streams.
3994 ///
3995 /// Returns `(total_bytes_in, total_bytes_out)` where bytes_in = `bin + backend_bin`
3996 /// and bytes_out = `bout + backend_bout` for each stream.
3997 fn compute_stream_byte_totals<L: ListenerHandler + L7ListenerHandler>(
3998 &self,
3999 context: &Context<L>,
4000 ) -> (usize, usize) {
4001 let mut total_in = 0usize;
4002 let mut total_out = 0usize;
4003 for &gid in self.streams.values() {
4004 let m = &context.streams[gid].metrics;
4005 total_in += m.bin + m.backend_bin;
4006 total_out += m.bout + m.backend_bout;
4007 }
4008 (total_in, total_out)
4009 }
4010
4011 /// Distribute connection-level byte overhead proportionally to a single stream.
4012 ///
4013 /// `totals` should be pre-computed via [`compute_stream_byte_totals`] **before**
4014 /// taking a mutable borrow on the target stream, to avoid borrow conflicts.
4015 /// Delegates to the free function [`distribute_overhead`].
4016 fn distribute_overhead(&mut self, metrics: &mut SessionMetrics, totals: (usize, usize)) {
4017 let stream_bytes = (
4018 metrics.bin + metrics.backend_bin,
4019 metrics.bout + metrics.backend_bout,
4020 );
4021 distribute_overhead(
4022 metrics,
4023 &mut self.bytes.overhead_bin,
4024 &mut self.bytes.overhead_bout,
4025 stream_bytes,
4026 totals,
4027 self.streams.len(),
4028 self.streams.len() <= 1,
4029 );
4030 }
4031
4032 /// Attribute accumulated `zero_bytes_read` to the stream or to connection overhead.
4033 fn attribute_bytes_to_stream(&mut self, metrics: &mut SessionMetrics) {
4034 self.position
4035 .count_bytes_in(metrics, self.bytes.zero_bytes_read);
4036 self.bytes.zero_bytes_read = 0;
4037 }
4038
4039 fn attribute_bytes_to_overhead(&mut self) {
4040 self.bytes.overhead_bin += self.bytes.zero_bytes_read;
4041 self.bytes.zero_bytes_read = 0;
4042 }
4043
4044 /// Queue a WINDOW_UPDATE, coalescing with any existing entry for the same stream_id.
4045 /// RFC 9113 §6.9.1: window size increment MUST be 1..2^31-1 (0x7FFFFFFF).
4046 ///
4047 /// Always signals pending write so callers don't have to remember the
4048 /// edge-triggered epoll invariant (see memory feedback_epollet_signal_pending_write):
4049 /// under ET epoll a queued WINDOW_UPDATE without a live WRITABLE event bit
4050 /// is invisible to filter_interest() and will never get flushed.
4051 fn queue_window_update(&mut self, stream_id: u32, increment: u32) {
4052 let max_increment = i32::MAX as u32;
4053 if let Some(existing) = self.flow_control.pending_window_updates.get_mut(&stream_id) {
4054 let old = *existing;
4055 *existing = existing.saturating_add(increment).min(max_increment);
4056 // Coalescing invariant: the accumulated increment never decreases
4057 // and never exceeds i32::MAX (RFC 9113 §6.9 caps a WINDOW_UPDATE
4058 // increment at 2^31-1; emitting a larger value would be a protocol
4059 // error on the wire).
4060 debug_assert!(
4061 *existing >= old,
4062 "coalesced WINDOW_UPDATE increment must be monotonic non-decreasing"
4063 );
4064 debug_assert!(
4065 *existing <= max_increment,
4066 "coalesced WINDOW_UPDATE increment must stay within i32::MAX"
4067 );
4068 trace!(
4069 "{} WINDOW_UPDATE coalesced: stream={} old={} new={}",
4070 log_context!(self),
4071 stream_id,
4072 old,
4073 *existing
4074 );
4075 } else if self.flow_control.pending_window_updates.len() < self.max_pending_window_updates {
4076 self.flow_control
4077 .pending_window_updates
4078 .insert(stream_id, increment.min(max_increment));
4079 trace!(
4080 "{} WINDOW_UPDATE queued: stream={} increment={}",
4081 log_context!(self),
4082 stream_id,
4083 increment.min(max_increment)
4084 );
4085 } else {
4086 error!(
4087 "{} WINDOW_UPDATE dropped: queue full ({} entries), stream={} increment={}",
4088 log_context!(self),
4089 self.max_pending_window_updates,
4090 stream_id,
4091 increment
4092 );
4093 incr!(names::h2::WINDOW_UPDATE_DROPPED);
4094 }
4095 self.readiness.arm_writable();
4096 }
4097
4098 /// Re-enable READABLE if this connection is parked waiting for buffer space
4099 /// and the target stream's buffer now has enough room.
4100 ///
4101 /// This is the cross-readiness counterpart to the same-connection check in
4102 /// `writable()`. When the *other side* of a stream (frontend or backend)
4103 /// drains data via its own `writable()`, it frees buffer space that this
4104 /// connection was waiting for. Without this explicit wake-up the connection
4105 /// stays parked and the session deadlocks until a timeout fires.
4106 ///
4107 /// Returns `true` if READABLE was re-enabled.
4108 pub fn try_resume_reading<L>(&mut self, context: &Context<L>) -> bool
4109 where
4110 L: ListenerHandler + L7ListenerHandler,
4111 {
4112 if let Some((
4113 H2StreamId::Other {
4114 gid: global_stream_id,
4115 ..
4116 },
4117 amount,
4118 )) = self.expect_read
4119 {
4120 let stream = &context.streams[global_stream_id];
4121 let kawa = match self.position {
4122 Position::Client(..) => &stream.back,
4123 Position::Server => &stream.front,
4124 };
4125 if kawa.storage.available_space() >= amount {
4126 self.readiness.interest.insert(Ready::READABLE);
4127 return true;
4128 }
4129 }
4130 false
4131 }
4132
4133 /// Mark a stream's position-appropriate end-of-stream flag.
4134 ///
4135 /// Server reads from the front (client), so sets `front_received_end_of_stream`.
4136 /// Client reads from the back (backend), so sets `back_received_end_of_stream`.
4137 fn mark_end_of_stream(&self, stream: &mut crate::protocol::mux::Stream) {
4138 if self.position.is_server() {
4139 stream.front_received_end_of_stream = true;
4140 } else {
4141 stream.back_received_end_of_stream = true;
4142 }
4143 }
4144
4145 /// Cancel streams that have been idle longer than [`Self::stream_idle_timeout`].
4146 ///
4147 /// A stream is considered idle when no meaningful application data (non-empty
4148 /// DATA frames or HEADERS) has been received since the last activity timestamp
4149 /// in [`Self::stream_last_activity_at`].
4150 ///
4151 /// Mitigates slow-multiplex Slowloris (Pass 4 Medium #3): the connection-level
4152 /// idle timer resets on every frame, so a peer sending periodic control frames
4153 /// can pin `max_concurrent_streams` slots for the full nominal connection timeout.
4154 /// Per-stream idle deadlines guarantee each stream terminates if it stops making
4155 /// forward progress, regardless of connection-level liveness.
4156 ///
4157 /// Timed-out streams receive RST_STREAM(CANCEL) and are immediately removed
4158 /// from the streams map so they no longer count against MAX_CONCURRENT_STREAMS.
4159 /// Backend endpoints are notified and metrics are finalized.
4160 pub fn cancel_timed_out_streams<E, L>(&mut self, context: &mut Context<L>, endpoint: &mut E)
4161 where
4162 E: Endpoint,
4163 L: ListenerHandler + L7ListenerHandler,
4164 {
4165 // Per-connection scratch Vecs (`converter_buf`, `lowercase_buf`,
4166 // `cookie_buf`, `priorities_buf`) grow to a
4167 // high-water mark and never shrink. On a long-lived idle H2
4168 // connection that briefly carried a flurry of large headers, the
4169 // backing memory stays pinned indefinitely. Reclaim past
4170 // `SCRATCH_BUF_RETAIN` when the connection has live streams but
4171 // each scratch buffer holds 4× the cap. Quiet-time only — runs
4172 // at the top of every `cancel_timed_out_streams` invocation
4173 // (which is itself called from the readable hot loop, but only
4174 // on a session that has been idle long enough to risk timing
4175 // out a stream).
4176 const SCRATCH_BUF_RETAIN: usize = 16 * 1024;
4177 if self.converter_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4178 self.converter_buf.shrink_to(SCRATCH_BUF_RETAIN);
4179 }
4180 if self.lowercase_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4181 self.lowercase_buf.shrink_to(SCRATCH_BUF_RETAIN);
4182 }
4183 if self.cookie_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4184 self.cookie_buf.shrink_to(SCRATCH_BUF_RETAIN);
4185 }
4186 if self.priorities_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
4187 self.priorities_buf.shrink_to(SCRATCH_BUF_RETAIN);
4188 }
4189
4190 if self.streams.is_empty()
4191 || (self.stream_last_activity_at.is_empty() && self.stream_fc_stalled_since.is_empty())
4192 {
4193 return;
4194 }
4195 let now = Instant::now();
4196 let deadline = self.stream_idle_timeout;
4197 // Two independent per-stream guards reap on the same deadline — see
4198 // `collect_timed_out_streams`. The flow-control-stall guard
4199 // (`stream_fc_stalled_since`) closes the HTTP/2 window-stall vector that
4200 // the bidirectional liveness guard (`stream_last_activity_at`) misses,
4201 // because an inbound DATA drip keeps the liveness timer warm while the
4202 // response stays window-blocked.
4203 let timed_out = collect_timed_out_streams(
4204 &self.stream_last_activity_at,
4205 &self.stream_fc_stalled_since,
4206 &self.streams,
4207 &self.rst_sent,
4208 now,
4209 deadline,
4210 );
4211 if timed_out.is_empty() {
4212 return;
4213 }
4214 for (sid, reason) in timed_out {
4215 info!(
4216 "{} H2 stream {} exceeded {:?} ({}), cancelling",
4217 log_context!(self),
4218 sid,
4219 deadline,
4220 reason
4221 );
4222 // M1: break reaps down by guard so a window-stall reap (a DoS
4223 // mitigation) is distinguishable from an ordinary idle reap on a
4224 // dashboard. M2: a window-stall reap whose stream dribbled some
4225 // outbound progress (`acc > 0`) below the floor is specifically a
4226 // stall-budget reap — the `WINDOW_UPDATE`-drip vector the budget
4227 // closes — counted as a subset. Read the accumulator BEFORE
4228 // `remove_dead_stream` evicts it below.
4229 match reason {
4230 "H2::WindowStall" => {
4231 count!(names::h2::STREAMS_REAPED_WINDOW_STALL, 1);
4232 if matches!(self.stream_fc_stalled_progress.get(&sid), Some(&acc) if acc > 0) {
4233 count!(names::h2::STREAMS_REAPED_STALL_BUDGET, 1);
4234 }
4235 }
4236 "H2::IdleTimeout" => count!(names::h2::STREAMS_REAPED_IDLE_TIMEOUT, 1),
4237 other => debug!("{} unexpected reap reason {}", log_context!(self), other),
4238 }
4239 // Route through the canonical chokepoint so dedupe (rst_sent),
4240 // queued-cap accounting (MAX_PENDING_RST_STREAMS via
4241 // total_rst_streams_queued), and edge-triggered-epoll arming
4242 // (Readiness::arm_writable) all stay consistent — see LIFECYCLE
4243 // §8.2. The previous direct push bypassed all three: a peer
4244 // that opens 200 streams and lets them all idle past
4245 // stream_idle_timeout could push past the queued cap silently
4246 // (no GOAWAY(ENHANCE_YOUR_CALM) escalation), a double-cancel
4247 // pass would grow pending_rst_streams instead of short-
4248 // circuiting on the existing rst_sent membership, and the
4249 // hand-rolled `interest.insert(WRITABLE) + signal_pending_write`
4250 // pair below skipped invariant 15. Counting these RSTs against
4251 // the cap is a deliberate behaviour change: 200 cumulative idle
4252 // cancellations from one peer IS abusive (pinning
4253 // MAX_CONCURRENT_STREAMS slots), and the GOAWAY(ENHANCE_YOUR_CALM)
4254 // escalation tells the peer to reconnect with a clean state.
4255 //
4256 // We deliberately ignore the `Option<MuxResult>` flood-violation
4257 // signal here — `cancel_timed_out_streams` returns `()` and is
4258 // called as best-effort housekeeping during the read path. A
4259 // flood violation that becomes visible mid-iteration will be
4260 // re-detected on the next `record_rst_emitted` call (the
4261 // counter is sticky), so dropping the early-return is safe.
4262 let _ = self.enqueue_rst(sid, H2Error::Cancel);
4263
4264 // Remove from streams map and recycle the context stream so the slot
4265 // no longer counts against MAX_CONCURRENT_STREAMS.
4266 // Compute totals per-stream before remove (matches RST_STREAM handler).
4267 let byte_totals = self.compute_stream_byte_totals(context);
4268 if let Some(global_stream_id) = self.streams.get(&sid).copied() {
4269 {
4270 let stream = &mut context.streams[global_stream_id];
4271 self.attribute_bytes_to_stream(&mut stream.metrics);
4272 }
4273 // Check if stream is linked to a backend — borrow must be scoped
4274 // so end_stream can take &mut context.
4275 let linked_token = context.streams[global_stream_id].linked_token();
4276 let (client_rtt, server_rtt) =
4277 Self::snapshot_rtts(&self.position, &self.socket, &*endpoint, linked_token);
4278 if let Some(token) = linked_token {
4279 endpoint.end_stream(token, global_stream_id, context);
4280 }
4281 let stream = &mut context.streams[global_stream_id];
4282 match &self.position {
4283 Position::Client(_, backend, BackendStatus::Connected) => {
4284 let mut backend_borrow = backend.borrow_mut();
4285 backend_borrow.active_requests =
4286 backend_borrow.active_requests.saturating_sub(1);
4287 }
4288 Position::Client(..) => {}
4289 Position::Server => {
4290 self.distribute_overhead(&mut stream.metrics, byte_totals);
4291 stream.metrics.backend_stop();
4292 stream.generate_access_log(
4293 true,
4294 Some(reason),
4295 context.listener.clone(),
4296 client_rtt,
4297 server_rtt,
4298 );
4299 stream.state = StreamState::Recycle;
4300 }
4301 }
4302 // Retire sid from streams/prioriser/stream_last_activity_at and
4303 // invalidate expect_write/expect_read if they reference this gid.
4304 self.remove_dead_stream(sid, global_stream_id);
4305 }
4306 }
4307 // Writable arming is already done by enqueue_rst -> arm_writable in
4308 // the loop above; the trailing pair was redundant after the chokepoint
4309 // routing landed.
4310 }
4311
4312 /// Queue a `RST_STREAM` frame for serialisation by
4313 /// [`Self::flush_pending_control_frames`] on the next writable tick.
4314 ///
4315 /// This is the canonical entry point for proxy-emitted stream resets:
4316 /// `DATA` on a closed stream, `MAX_CONCURRENT_STREAMS` refusal, and the
4317 /// per-stream error paths in [`Self::reset_stream`] all funnel through
4318 /// here. Serialisation is independent of the owning `Stream` still
4319 /// existing in `self.streams`, which is what lets us emit even after a
4320 /// caller has already called [`Self::remove_dead_stream`].
4321 ///
4322 /// Delegates the primitive work to [`enqueue_rst_into`] so the invariants
4323 /// are covered by unit tests that don't need a full `ConnectionH2`
4324 /// fixture. See that function's doc-comment for the three invariants
4325 /// (dedupe via `rst_sent`, MadeYouReset queued cap via
4326 /// `total_rst_streams_queued`, edge-triggered-epoll arm via
4327 /// [`Readiness::arm_writable`]).
4328 fn enqueue_rst(&mut self, wire_stream_id: StreamId, error: H2Error) -> Option<MuxResult> {
4329 let freshly_queued = enqueue_rst_into(
4330 &mut self.pending_rst_streams,
4331 &mut self.total_rst_streams_queued,
4332 &mut self.rst_sent,
4333 &mut self.readiness,
4334 wire_stream_id,
4335 error,
4336 );
4337 // Account ONLY when a new RST actually entered the queue.
4338 // Calling `enqueue_rst` for a stream that already has a queued
4339 // (or already-flushed) RST is the dedup short-circuit — counting
4340 // those would inflate `h2.frames.tx.rst_stream` /
4341 // `h2.rst_stream.sent.*` and trip the CVE-2025-8671 MadeYouReset
4342 // lifetime cap on frames that never reached the wire.
4343 //
4344 // Account at queue-time, not at drain-time. Doing it later in
4345 // `flush_pending_control_frames` would double-count any RST that
4346 // a re-entrant call (DATA on a closed stream we already RSTed)
4347 // tried to enqueue — and missing it at queue-time leaves
4348 // `cancel_timed_out_streams` / `refuse_stream_and_discard` /
4349 // DATA-on-closed-stream paths bypassing the lifetime cap
4350 // (security review LISA-001 on commit `da845c71`).
4351 if freshly_queued {
4352 self.account_emitted_rst(error)
4353 } else {
4354 None
4355 }
4356 }
4357
4358 /// Single accounting site for proxy-emitted RST_STREAM frames.
4359 /// Three things must happen for every emitted RST so flood-protection
4360 /// stays honest: the global tx counter, the per-error breakdown,
4361 /// and the MadeYouReset emitted-RST lifetime cap.
4362 ///
4363 /// Two distinct emission paths feed this helper:
4364 /// * Queued frames — [`Self::enqueue_rst`] (and therefore every
4365 /// callable that funnels through it: `reset_stream`,
4366 /// `refuse_stream_and_discard`, `cancel_timed_out_streams`,
4367 /// DATA-on-closed-stream) calls this once at queue-time. The
4368 /// drain in `flush_pending_control_frames` does NOT call it
4369 /// again — that would double-count.
4370 /// * Converter-emitted frames — the converter's `initialize`
4371 /// chokepoint (and the HPACK over-budget abort path) writes
4372 /// RST_STREAM frames straight into `kawa.out` from inside
4373 /// `kawa.prepare`. We collect those `H2Error` codes during the
4374 /// `write_streams` loop and call this helper for each one
4375 /// after `drop(converter)` (because the converter holds
4376 /// `&mut self.encoder`).
4377 ///
4378 /// Returning `Some(MuxResult)` means the caller MUST short-circuit
4379 /// with that result — the flood detector tripped its lifetime cap
4380 /// and converted to a connection-wide GOAWAY.
4381 fn account_emitted_rst(&mut self, error: H2Error) -> Option<MuxResult> {
4382 incr!(names::h2::FRAMES_TX_RST_STREAM);
4383 count!(metric_for_rst_stream_sent(error), 1);
4384 if !matches!(error, H2Error::NoError) {
4385 if let Some(violation) = self.flood_detector.record_rst_emitted() {
4386 return Some(self.handle_flood_violation(violation));
4387 }
4388 }
4389 None
4390 }
4391
4392 /// Refuse a newly-opened stream with RST_STREAM and discard its HEADERS payload.
4393 ///
4394 /// Used when MAX_CONCURRENT_STREAMS is exceeded or buffer pool is exhausted.
4395 /// Queues the RST_STREAM for the writable path (can't write to kawa.storage
4396 /// here because it is needed to discard the HEADERS payload).
4397 ///
4398 /// Also applies SETTINGS back-pressure per RFC 9113 §5.1.2: if refusals
4399 /// burst past [`BACKPRESSURE_REFUSAL_THRESHOLD`] within
4400 /// [`BACKPRESSURE_WINDOW_DURATION`], the advertised
4401 /// `SETTINGS_MAX_CONCURRENT_STREAMS` is halved via
4402 /// [`Self::apply_mcs_backpressure`].
4403 fn refuse_stream_and_discard(
4404 &mut self,
4405 stream_id: StreamId,
4406 error: H2Error,
4407 payload_len: u32,
4408 ) -> MuxResult {
4409 if let Some(result) = self.enqueue_rst(stream_id, error) {
4410 return result;
4411 }
4412 self.state = H2State::Discard;
4413 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
4414 self.record_refusal_for_backpressure();
4415 MuxResult::Continue
4416 }
4417
4418 /// RFC 9113 §5.1.2 SETTINGS back-pressure bookkeeping.
4419 ///
4420 /// Increments the refusal counter for the current back-pressure window
4421 /// and, when the burst threshold is crossed, halves the advertised
4422 /// `SETTINGS_MAX_CONCURRENT_STREAMS`. Further halving attempts in the
4423 /// same connection are suppressed by [`Self::mcs_backpressure_applied`]
4424 /// so sustained abuse does not collapse the cap to zero — callers can
4425 /// still promote the situation to `EnhanceYourCalm` via the flood
4426 /// detector.
4427 fn record_refusal_for_backpressure(&mut self) {
4428 if self.refuse_window_start.elapsed() >= BACKPRESSURE_WINDOW_DURATION {
4429 self.refuse_count_window = 0;
4430 self.refuse_window_start = Instant::now();
4431 }
4432 self.refuse_count_window = self.refuse_count_window.saturating_add(1);
4433 if !self.mcs_backpressure_applied
4434 && self.refuse_count_window >= BACKPRESSURE_REFUSAL_THRESHOLD
4435 {
4436 self.apply_mcs_backpressure();
4437 }
4438 }
4439
4440 /// Halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` and mark the
4441 /// back-pressure state as applied. The new value takes effect locally
4442 /// immediately — subsequent stream-open checks in `handle_header_state`
4443 /// compare `self.streams.len()` against this reduced cap, so the peer
4444 /// starts receiving `REFUSED_STREAM` earlier. A full SETTINGS re-send on
4445 /// the wire is deferred until we have a mid-connection SETTINGS queue
4446 /// (the existing path in `handle_preface_state` only fires during the
4447 /// handshake); this is noted in the task log as a minimal first step.
4448 fn apply_mcs_backpressure(&mut self) {
4449 let previous = self.local_settings.settings_max_concurrent_streams;
4450 let reduced = (previous / 2).max(1);
4451 warn!(
4452 "{} H2 SETTINGS back-pressure: refusals={} in {}s — halving \
4453 SETTINGS_MAX_CONCURRENT_STREAMS {} -> {}",
4454 log_context!(self),
4455 self.refuse_count_window,
4456 BACKPRESSURE_WINDOW_DURATION.as_secs(),
4457 previous,
4458 reduced,
4459 );
4460 self.local_settings.settings_max_concurrent_streams = reduced;
4461 self.mcs_backpressure_applied = true;
4462 }
4463
4464 /// Log a flood violation with full session context and emit the GOAWAY.
4465 ///
4466 /// Centralises the "flood detected" reporting so every site that observes a
4467 /// [`H2FloodViolation`] gets the same session-scoped log line, matching the
4468 /// RUSTLS log-context convention. Also emits the per-kind statsd counter
4469 /// (`h2.flood.violation.<kind>`) so SOC dashboards can window the trip
4470 /// rate without parsing logs — every CVE-mitigation in the H2 family
4471 /// (Rapid Reset, MadeYouReset, CONTINUATION/PING/SETTINGS floods, header
4472 /// overflow, glitch) funnels through this site.
4473 pub fn handle_flood_violation(&mut self, violation: H2FloodViolation) -> MuxResult {
4474 count!(violation.metric_key, 1);
4475 warn!(
4476 "{} H2 flood detected: {} count {} exceeds threshold {}",
4477 log_context!(self),
4478 violation.reason,
4479 violation.count,
4480 violation.threshold,
4481 );
4482 self.goaway(violation.error)
4483 }
4484}
4485
4486/// Recover the [`H2Error`] code that the converter's `initialize`
4487/// chokepoint will encode into the synthesised RST_STREAM frame for a
4488/// kawa stuck in [`kawa::ParsingPhase::Error`]. Mirrors the parse +
4489/// fallback at `lib/src/protocol/mux/converter.rs::initialize` so the
4490/// flood-accounting helper sees the same code that lands on the wire.
4491fn rst_error_from_kawa<T: kawa::AsBuffer>(kawa: &kawa::Kawa<T>) -> H2Error {
4492 match kawa.parsing_phase {
4493 kawa::ParsingPhase::Error {
4494 kind: kawa::ParsingErrorKind::Processing { message },
4495 ..
4496 } => message.parse::<H2Error>().unwrap_or(H2Error::InternalError),
4497 _ => H2Error::InternalError,
4498 }
4499}
4500
4501/// Compile-time mapping from `(prefix, H2Error)` to a static metric key.
4502///
4503/// Materialises a `&'static str` literal via `concat!`, so the metric key
4504/// never crosses through a heap allocation and the statsd drain can store it
4505/// as `&'static str`. Adding a new `H2Error` variant fails the build here —
4506/// the metric breakdown stays in lock-step with RFC 9113 §7 codes.
4507///
4508/// Used for the per-error-code counters emitted around GOAWAY and RST_STREAM
4509/// in either direction (see `metric_for_goaway_sent` etc. below).
4510macro_rules! h2_error_metric_key {
4511 ($prefix:literal, $error:expr) => {
4512 match $error {
4513 H2Error::NoError => concat!($prefix, ".no_error"),
4514 H2Error::ProtocolError => concat!($prefix, ".protocol_error"),
4515 H2Error::InternalError => concat!($prefix, ".internal_error"),
4516 H2Error::FlowControlError => concat!($prefix, ".flow_control_error"),
4517 H2Error::SettingsTimeout => concat!($prefix, ".settings_timeout"),
4518 H2Error::StreamClosed => concat!($prefix, ".stream_closed"),
4519 H2Error::FrameSizeError => concat!($prefix, ".frame_size_error"),
4520 H2Error::RefusedStream => concat!($prefix, ".refused_stream"),
4521 H2Error::Cancel => concat!($prefix, ".cancel"),
4522 H2Error::CompressionError => concat!($prefix, ".compression_error"),
4523 H2Error::ConnectError => concat!($prefix, ".connect_error"),
4524 H2Error::EnhanceYourCalm => concat!($prefix, ".enhance_your_calm"),
4525 H2Error::InadequateSecurity => concat!($prefix, ".inadequate_security"),
4526 H2Error::HTTP11Required => concat!($prefix, ".http_1_1_required"),
4527 }
4528 };
4529}
4530
4531/// Static metric key for an outbound GOAWAY. Same call shape as the other three
4532/// helpers below — keeps the call sites uniform.
4533fn metric_for_goaway_sent(error: H2Error) -> &'static str {
4534 h2_error_metric_key!("h2.goaway.sent", error)
4535}
4536
4537/// Static metric key for an inbound GOAWAY by raw wire error code. Codes
4538/// outside RFC 9113 §7 fall into the dedicated `…unknown_error` bucket so the
4539/// breakdown stays bounded and operators can still spot non-standard peers.
4540fn metric_for_goaway_received(error_code: u32) -> &'static str {
4541 H2Error::try_from(error_code)
4542 .map(|e| h2_error_metric_key!("h2.goaway.received", e))
4543 .unwrap_or("h2.goaway.received.unknown_error")
4544}
4545
4546/// Static metric key for an outbound RST_STREAM. Mirrors
4547/// [`metric_for_goaway_sent`] under a separate namespace so RST and GOAWAY
4548/// rates can be alerted on independently.
4549fn metric_for_rst_stream_sent(error: H2Error) -> &'static str {
4550 h2_error_metric_key!("h2.rst_stream.sent", error)
4551}
4552
4553/// Static metric key for an inbound RST_STREAM by raw wire error code. Same
4554/// `…unknown_error` fallback as [`metric_for_goaway_received`].
4555fn metric_for_rst_stream_received(error_code: u32) -> &'static str {
4556 H2Error::try_from(error_code)
4557 .map(|e| h2_error_metric_key!("h2.rst_stream.received", e))
4558 .unwrap_or("h2.rst_stream.received.unknown_error")
4559}
4560
4561/// Static metric key for an inbound H2 frame by RFC 9113 §6 frame type.
4562/// Emitted at the `handle_frame` dispatch — single chokepoint that any
4563/// new H2 frame type must traverse, so adding a `Frame::*` variant fails
4564/// the build here. Counts are per-frame, not per-byte; pair with
4565/// `bytes_in` for traffic-mix dashboards.
4566fn h2_frame_rx_metric_key(frame: &Frame) -> &'static str {
4567 match frame {
4568 Frame::Data(_) => "h2.frames.rx.data",
4569 Frame::Headers(_) => "h2.frames.rx.headers",
4570 Frame::PushPromise(_) => "h2.frames.rx.push_promise",
4571 Frame::Priority(_) => "h2.frames.rx.priority",
4572 Frame::RstStream(_) => "h2.frames.rx.rst_stream",
4573 Frame::Settings(_) => "h2.frames.rx.settings",
4574 Frame::Ping(_) => "h2.frames.rx.ping",
4575 Frame::GoAway(_) => "h2.frames.rx.goaway",
4576 Frame::WindowUpdate(_) => "h2.frames.rx.window_update",
4577 Frame::Continuation(_) => "h2.frames.rx.continuation",
4578 Frame::PriorityUpdate(_) => "h2.frames.rx.priority_update",
4579 Frame::Unknown(_) => "h2.frames.rx.unknown",
4580 }
4581}
4582
4583impl<Front: SocketHandler> ConnectionH2<Front> {
4584 pub fn goaway(&mut self, error: H2Error) -> MuxResult {
4585 self.state = H2State::Error;
4586 self.drain.draining = true;
4587 self.expect_read = None;
4588 // Disarm the SETTINGS ACK timer: once we've committed to GOAWAY, the
4589 // timeout check at `readable()` / `flush_pending_control_frames()` must
4590 // not re-fire. Without this, `signal_pending_write()` below re-enters
4591 // `writable()` → `flush_pending_control_frames()` on the next tick,
4592 // the elapsed check is still true, and we emit another
4593 // `warn!` + `goaway()` pair, each bumping `h2.goaway.sent.*`.
4594 self.settings_sent_at = None;
4595 let kawa = &mut self.zero;
4596 kawa.storage.clear();
4597 // Severity tiering: only `InternalError` implies a sozu-side bug when
4598 // WE emit it. Every other non-`NoError` reason is "peer misbehaved,
4599 // sozu defended correctly" — operators don't need paging on abusive
4600 // or buggy peers. Caller sites already log the specific antecedent
4601 // (flood detected, parser failure, SETTINGS timeout, invalid window)
4602 // before reaching `goaway()`, so demoting this summary line avoids
4603 // duplicate noise without hiding the root cause.
4604 match error {
4605 H2Error::NoError => debug!("{} GOAWAY: {:?}", log_context!(self), error),
4606 H2Error::InternalError => error!("{} GOAWAY: {:?}", log_context!(self), error),
4607 _ => warn!("{} GOAWAY: {:?}", log_context!(self), error),
4608 }
4609 count!(metric_for_goaway_sent(error), 1);
4610
4611 // RFC 9113 §6.8: last_stream_id is the highest peer-initiated stream we processed
4612 match serializer::gen_goaway(kawa.storage.space(), self.highest_peer_stream_id, error) {
4613 Ok((_, size)) => {
4614 kawa.storage.fill(size);
4615 incr!(names::h2::FRAMES_TX_GOAWAY);
4616 self.state = H2State::GoAway;
4617 self.expect_write = Some(H2StreamId::Zero);
4618 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
4619 self.readiness.signal_pending_write();
4620 MuxResult::Continue
4621 }
4622 Err(error) => {
4623 error!(
4624 "{} Could not serialize GoAwayFrame: {:?}",
4625 log_context!(self),
4626 error
4627 );
4628 self.force_disconnect()
4629 }
4630 }
4631 }
4632
4633 /// RFC 9113 §6.8: Initiate graceful shutdown using the double-GOAWAY pattern.
4634 ///
4635 /// First call sends GOAWAY with `last_stream_id = 0x7FFFFFFF` (MAX) to signal
4636 /// the intent to stop accepting new streams while allowing in-flight streams
4637 /// to complete. The connection enters draining mode.
4638 ///
4639 /// When `draining` is already true (second invocation), sends the final GOAWAY
4640 /// with the actual `highest_peer_stream_id` so the peer knows which streams
4641 /// were processed.
4642 pub fn graceful_goaway(&mut self) -> MuxResult {
4643 if self.drain.draining {
4644 // Second GOAWAY: send with the real last_stream_id
4645 return self.goaway(H2Error::NoError);
4646 }
4647
4648 // First GOAWAY: advertise MAX stream ID so the peer knows we are draining
4649 // but does not yet know the cutoff. This gives in-flight requests a chance
4650 // to arrive before we commit to a final last_stream_id.
4651 self.drain.draining = true;
4652 // Arm the forced-close timer from the moment the proxy decides to drain.
4653 // `Mux::shutting_down` samples it against `graceful_shutdown_deadline`
4654 // and returns `true` once the budget is exhausted so the session loop
4655 // tears the connection down instead of waiting forever.
4656 self.drain.started_at = Some(Instant::now());
4657 // Keep expect_read as-is: existing streams should continue reading
4658 // data during the drain window opened by the initial GOAWAY. Only
4659 // the final GOAWAY (via `goaway()`) removes READABLE.
4660 let kawa = &mut self.zero;
4661 kawa.storage.clear();
4662 debug!(
4663 "{} GOAWAY (graceful, initial): last_stream_id=0x7FFFFFFF",
4664 log_context!(self)
4665 );
4666 // The initial GOAWAY sends NO_ERROR on the wire — count it under
4667 // the same per-code key as the final GOAWAY. The downstream alert
4668 // that wants to distinguish drain from termination compares
4669 // against the `h2.goaway.sent.no_error` rate (drain) vs the other
4670 // variants (termination on error).
4671 count!(metric_for_goaway_sent(H2Error::NoError), 1);
4672
4673 match serializer::gen_goaway(kawa.storage.space(), STREAM_ID_MAX, H2Error::NoError) {
4674 Ok((_, size)) => {
4675 kawa.storage.fill(size);
4676 incr!(names::h2::FRAMES_TX_GOAWAY);
4677 // Stay in the current state so the connection can continue processing
4678 // existing streams. The final GOAWAY will transition to GoAway state.
4679 // Keep READABLE so in-flight request bodies can still be received
4680 // during the drain window. Only remove READABLE in the final GOAWAY
4681 // (via `goaway()`).
4682 self.expect_write = Some(H2StreamId::Zero);
4683 self.readiness.arm_writable();
4684 MuxResult::Continue
4685 }
4686 Err(error) => {
4687 error!(
4688 "{} Could not serialize graceful GoAwayFrame: {:?}",
4689 log_context!(self),
4690 error
4691 );
4692 self.force_disconnect()
4693 }
4694 }
4695 }
4696
4697 /// Returns `true` when the graceful-shutdown budget armed by
4698 /// [`Self::graceful_goaway`] has elapsed. A return of `true` signals
4699 /// the enclosing session loop that the proxy-initiated drain must
4700 /// transition to a forced close: remaining streams will not complete
4701 /// in time and keeping the connection open past the deadline defeats
4702 /// the soft-stop SLA.
4703 ///
4704 /// Returns `false` when:
4705 /// - drain has not started yet (`started_at` is `None`),
4706 /// - the knob is `0` / `None` (indefinite wait explicitly opted in),
4707 /// - or the elapsed time is still within the configured budget.
4708 pub fn graceful_shutdown_deadline_elapsed(&self) -> bool {
4709 match (self.drain.started_at, self.drain.graceful_shutdown_deadline) {
4710 (Some(started_at), Some(deadline)) => started_at.elapsed() >= deadline,
4711 _ => false,
4712 }
4713 }
4714
4715 /// Returns `true` if there is data queued waiting to be flushed:
4716 /// - H2 control frames in the zero buffer (GOAWAY, SETTINGS ACK, etc.)
4717 /// - A partially-written stream or control frame (`expect_write`)
4718 /// - Encrypted TLS records in rustls's output buffer not yet flushed to TCP
4719 ///
4720 /// The TLS check is critical: `shutting_down()` uses this to prevent
4721 /// premature session close while response DATA is still in rustls's
4722 /// buffer (accepted by `socket_write_vectored` but not yet on the wire).
4723 ///
4724 /// Does NOT check per-stream `back.out`/`back.blocks`; use
4725 /// [`Self::has_pending_write_full`] on paths that must honour
4726 /// LIFECYCLE invariant 16 (e.g. shutdown-drain).
4727 pub fn has_pending_write(&self) -> bool {
4728 if self.peer_gone_after_final_goaway() {
4729 return false;
4730 }
4731 self.expect_write.is_some()
4732 || !self.zero.storage.is_empty()
4733 || self.socket.socket_wants_write()
4734 }
4735
4736 /// True when the reaper has queued control frames (`RST_STREAM`) into
4737 /// `pending_rst_streams` that have not yet been serialized. Kept SEPARATE
4738 /// from [`Self::has_pending_write`] because that probe gates connection close
4739 /// (the `mod.rs` close-gating sites) and must NOT treat a queued RST as a
4740 /// reason to keep the connection open; this probe is consulted ONLY by the
4741 /// `MuxState::timeout` flush gate to push a silent-peer `RST_STREAM(CANCEL)`
4742 /// onto the wire before the connection closes.
4743 pub fn has_pending_control_write(&self) -> bool {
4744 !self.pending_rst_streams.is_empty()
4745 }
4746
4747 /// Connection-level [`Self::has_pending_write`] extended with a per-stream
4748 /// back-buffer probe (LIFECYCLE §9 invariant 16). Used by shutdown-drain
4749 /// paths that must not close while any open stream still has outbound
4750 /// kawa bytes queued — a voluntary scheduler yield can leave `back.out`
4751 /// or `back.blocks` non-empty without `expect_write` being set.
4752 pub fn has_pending_write_full<L>(&self, context: &Context<L>) -> bool
4753 where
4754 L: ListenerHandler + L7ListenerHandler,
4755 {
4756 self.has_pending_write() || any_stream_has_pending_back(&self.streams, &context.streams)
4757 }
4758
4759 /// Flush the zero buffer to the socket, counting bytes as connection overhead.
4760 ///
4761 /// Returns `true` if the socket stalled (WouldBlock / zero-length write),
4762 /// meaning the caller should stop writing and wait for the next writable event.
4763 /// Returns `false` when the buffer has been fully drained.
4764 fn flush_zero_to_socket(&mut self) -> bool {
4765 while !self.zero.storage.is_empty() {
4766 let (size, status) = self.socket.socket_write(self.zero.storage.data());
4767 #[cfg(debug_assertions)]
4768 trace!(
4769 "{} flush_zero_to_socket: written={}, status={:?}, wants_write={}",
4770 log_context!(self),
4771 size,
4772 status,
4773 self.socket.socket_wants_write()
4774 );
4775 self.zero.storage.consume(size);
4776 self.position.count_bytes_out_counter(size);
4777 self.bytes.overhead_bout += size;
4778 if update_readiness_after_write(size, status, &mut self.readiness) {
4779 return true;
4780 }
4781 }
4782 // Reset buffer positions after draining. consume() advances start but
4783 // never resets it, so without clear() the next fill would panic.
4784 self.zero.storage.clear();
4785 false
4786 }
4787
4788 /// Directly flush the zero buffer to the socket without going through
4789 /// the full writable() path. Used during shutdown when the event loop
4790 /// won't deliver new epoll events for this session (edge-triggered).
4791 pub fn flush_zero_buffer(&mut self) {
4792 if self.flush_zero_to_socket() {
4793 return;
4794 }
4795 self.expect_write = None;
4796 if self.socket.socket_wants_write() {
4797 let (_size, status) = self.socket.socket_write(&[]);
4798 let _ = update_readiness_after_write(0, status, &mut self.readiness);
4799 }
4800 }
4801
4802 pub fn create_stream<L>(
4803 &mut self,
4804 stream_id: StreamId,
4805 context: &mut Context<L>,
4806 ) -> Option<GlobalStreamId>
4807 where
4808 L: ListenerHandler + L7ListenerHandler,
4809 {
4810 // RFC 9113 §6.8: reject new streams on a draining connection
4811 if self.drain.draining {
4812 error!(
4813 "{} Rejecting new stream {} on draining connection",
4814 log_context!(self),
4815 stream_id
4816 );
4817 return None;
4818 }
4819 let highest_before = self.highest_peer_stream_id;
4820 let streams_before = self.streams.len();
4821 // Track the highest peer-initiated stream ID for GoAway frames
4822 // before any early return, so GoAway always reports the correct last stream.
4823 if stream_id > self.highest_peer_stream_id {
4824 self.highest_peer_stream_id = stream_id;
4825 }
4826 // highest_peer_stream_id is monotonic non-decreasing — it only ever
4827 // climbs to the largest id we have accepted (RFC 9113 §6.8 last-stream
4828 // reporting depends on this).
4829 debug_assert!(
4830 self.highest_peer_stream_id >= highest_before,
4831 "highest_peer_stream_id must never regress"
4832 );
4833 let global_stream_id = context.create_stream(
4834 Ulid::generate(),
4835 self.peer_settings.settings_initial_window_size,
4836 )?;
4837 self.last_stream_id = (stream_id + 2) & !1;
4838 self.streams.insert(stream_id, global_stream_id);
4839 self.stream_last_activity_at
4840 .insert(stream_id, Instant::now());
4841 // Post-conditions: the stream is now reachable in both indices, the
4842 // active count grew by exactly one (the id was not already present —
4843 // `handle_header_state` rejects re-used ids), and `last_stream_id` is
4844 // the even watermark just past this id so `new_stream_id` never collides.
4845 debug_assert_eq!(
4846 self.streams.get(&stream_id).copied(),
4847 Some(global_stream_id),
4848 "create_stream must register the wire->global mapping"
4849 );
4850 debug_assert!(
4851 self.stream_last_activity_at.contains_key(&stream_id),
4852 "create_stream must arm the per-stream idle timer"
4853 );
4854 debug_assert_eq!(
4855 self.streams.len(),
4856 streams_before + 1,
4857 "create_stream must add exactly one stream (id must not pre-exist)"
4858 );
4859 debug_assert!(
4860 self.last_stream_id > stream_id && self.last_stream_id & 1 == 0,
4861 "last_stream_id watermark must be the even value strictly above stream_id"
4862 );
4863 Some(global_stream_id)
4864 }
4865
4866 pub fn new_stream_id(&mut self) -> Option<StreamId> {
4867 let watermark_before = self.last_stream_id;
4868 let (issued, next) = next_stream_id(self.last_stream_id, self.position.is_client())?;
4869 self.last_stream_id = next;
4870 // Post-conditions: the locally-issued id has the parity of our role and
4871 // the watermark advanced strictly (so the next allocation cannot reuse
4872 // this id). `next_stream_id` already asserts parity vs `is_client`; here
4873 // we re-assert against `self.position` and the watermark monotonicity.
4874 debug_assert_eq!(
4875 issued & 1 == 1,
4876 self.position.is_client(),
4877 "locally-issued stream id parity must match our role"
4878 );
4879 debug_assert!(
4880 self.last_stream_id > watermark_before,
4881 "issuing a stream id must advance the watermark"
4882 );
4883 Some(issued)
4884 }
4885
4886 /// Test-only setter: jump `last_stream_id` close to [`STREAM_ID_MAX`] so
4887 /// that the next call to [`Self::new_stream_id`] exhausts the 31-bit
4888 /// space. FIX-22 ("Stream-ID exhaustion disconnects backend gracefully")
4889 /// exercises the `None`-return branch — reaching it through normal API
4890 /// usage would require issuing ~2³¹ requests, which is not tractable in
4891 /// an E2E harness.
4892 #[cfg(any(test, feature = "e2e-hooks"))]
4893 pub fn __test_set_last_stream_id(&mut self, id: StreamId) {
4894 self.last_stream_id = id;
4895 }
4896
4897 /// Cross-field invariant sweep for the H2 connection state machine,
4898 /// asserted as a run-to-completion post-condition at the end of every
4899 /// frame-handling pass (see the call in [`Self::handle_frame`]).
4900 ///
4901 /// These are relationships between *separate* fields that no single setter
4902 /// can guarantee on its own — exactly the class of bug TigerStyle's
4903 /// `check_invariants` targets. Each one is cheap (counter compares + a few
4904 /// `HashMap` membership probes); the whole function is `#[cfg(debug_assertions)]`
4905 /// and compiles out of release entirely.
4906 ///
4907 /// Encoded invariants:
4908 /// 1. **Stream-id watermark parity**: locally-issued ids never exceed
4909 /// `STREAM_ID_MAX`; `last_stream_id` stays the even watermark (it is
4910 /// rounded to `(id + 2) & !1` and initialised to 0).
4911 /// 2. **Per-stream caches are subsets of the live stream set**:
4912 /// `stream_last_activity_at` is keyed only by currently-tracked stream
4913 /// ids — a leak here would let a removed stream keep an idle timer and
4914 /// mis-fire `cancel_timed_out_streams`. (`rst_sent` is intentionally NOT
4915 /// a subset: a queued RST for an already-removed stream is legal.)
4916 /// 3. **RST queue accounting**: the never-decaying `total_rst_streams_queued`
4917 /// lifetime counter is always `>=` the currently-pending queue length
4918 /// (CVE-2025-8671 MadeYouReset cap relies on the lifetime counter never
4919 /// under-counting), and the pending queue stays within its hard cap +1
4920 /// (the escalation tripwire fires at the cap).
4921 /// 4. **Pending WINDOW_UPDATE bound**: the coalescing map never exceeds the
4922 /// per-connection cap derived from `max_concurrent_streams`.
4923 /// 5. **Drain/state coupling**: a terminal `GoAway`/`Error` state implies the
4924 /// connection is draining (`goaway()` sets both); the converse need not
4925 /// hold (graceful drain stays in a live state).
4926 #[cfg(debug_assertions)]
4927 fn check_invariants<L>(&self, context: &Context<L>)
4928 where
4929 L: ListenerHandler + L7ListenerHandler,
4930 {
4931 // (1) Watermark parity and bound.
4932 debug_assert!(
4933 self.last_stream_id & 1 == 0,
4934 "last_stream_id must stay an even watermark, got {}",
4935 self.last_stream_id
4936 );
4937
4938 // (2) Per-stream caches are subsets of the live stream set, and every
4939 // mapping points at a valid context slot.
4940 debug_assert!(
4941 self.stream_last_activity_at
4942 .keys()
4943 .all(|id| self.streams.contains_key(id)),
4944 "stream_last_activity_at must only track currently-open stream ids"
4945 );
4946 debug_assert!(
4947 self.streams
4948 .values()
4949 .all(|&gid| gid < context.streams.len()),
4950 "every stream mapping must point at a valid context slot"
4951 );
4952
4953 // (3) RST queue accounting.
4954 debug_assert!(
4955 self.total_rst_streams_queued >= self.pending_rst_streams.len(),
4956 "queued-RST lifetime counter ({}) must be >= currently-pending queue ({})",
4957 self.total_rst_streams_queued,
4958 self.pending_rst_streams.len()
4959 );
4960 debug_assert!(
4961 self.pending_rst_streams.len() <= MAX_PENDING_RST_STREAMS + 1,
4962 "pending RST queue must stay within its hard cap (escalates at the cap)"
4963 );
4964
4965 // (4) Pending WINDOW_UPDATE coalescing map bound.
4966 debug_assert!(
4967 self.flow_control.pending_window_updates.len() <= self.max_pending_window_updates,
4968 "pending WINDOW_UPDATE map must stay within its per-connection cap"
4969 );
4970
4971 // (5) Drain/state coupling: terminal states imply draining.
4972 debug_assert!(
4973 !matches!(self.state, H2State::GoAway | H2State::Error) || self.drain.draining,
4974 "GoAway/Error state must imply the connection is draining"
4975 );
4976 }
4977
4978 fn handle_frame<E, L>(
4979 &mut self,
4980 frame: Frame,
4981 wire_payload_len: u32,
4982 context: &mut Context<L>,
4983 endpoint: E,
4984 ) -> MuxResult
4985 where
4986 E: Endpoint,
4987 L: ListenerHandler + L7ListenerHandler,
4988 {
4989 trace!("{} {:#?}", log_context!(self), frame);
4990 // Per-frame-type RX counter. Single chokepoint covers every H2 frame
4991 // type — adding a new `Frame::*` variant fails the build inside the
4992 // helper, keeping the metric breakdown in lock-step with RFC 9113 §6.
4993 count!(h2_frame_rx_metric_key(&frame), 1);
4994 let result = match frame {
4995 Frame::Data(data) => self.handle_data_frame(data, wire_payload_len, context, endpoint),
4996 Frame::Headers(headers) => self.handle_headers_frame(headers, context, endpoint),
4997 Frame::PushPromise(_) => self.handle_push_promise_frame(),
4998 Frame::Priority(priority) => self.handle_priority_frame(priority, context, endpoint),
4999 Frame::RstStream(rst_stream) => {
5000 self.handle_rst_stream_frame(rst_stream, context, endpoint)
5001 }
5002 Frame::Settings(settings) => self.handle_settings_frame(settings, context),
5003 Frame::Ping(ping) => self.handle_ping_frame(ping),
5004 Frame::GoAway(goaway) => self.handle_goaway_frame(goaway, context, endpoint),
5005 Frame::WindowUpdate(wu) => self.handle_window_update_frame(wu, context, endpoint),
5006 Frame::PriorityUpdate(pu) => self.handle_priority_update_frame(pu),
5007 Frame::Continuation(_) => {
5008 // Unreachable: standalone CONTINUATION is rejected in
5009 // `handle_header_state` (RFC 9113 §6.10) and in-block
5010 // CONTINUATION is consumed by the inline header-parsing
5011 // path. Keep a defensive fallback that returns
5012 // PROTOCOL_ERROR rather than panicking in debug builds.
5013 self.attribute_bytes_to_overhead();
5014 warn!(
5015 "{} CONTINUATION frames are handled inline during header parsing",
5016 log_context!(self)
5017 );
5018 self.goaway(H2Error::ProtocolError)
5019 }
5020 // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
5021 // The parser already consumed the payload; attribute the bytes
5022 // to connection-level overhead and continue.
5023 Frame::Unknown(raw) => {
5024 debug!(
5025 "{} Ignoring unknown H2 frame type {}",
5026 log_context!(self),
5027 raw
5028 );
5029 self.attribute_bytes_to_overhead();
5030 MuxResult::Continue
5031 }
5032 };
5033 // Run-to-completion post-condition: the connection-level cross-field
5034 // invariants must hold after every frame is dispatched, on success and
5035 // on the protocol-error paths alike.
5036 #[cfg(debug_assertions)]
5037 self.check_invariants(context);
5038 result
5039 }
5040
5041 /// RFC 9110 §8.6: Content-Length validation must be skipped for responses
5042 /// where the body is absent by definition:
5043 /// - Responses to HEAD requests (any status)
5044 /// - 1xx informational responses
5045 /// - 204 No Content
5046 /// - 304 Not Modified
5047 fn content_length_exempt(
5048 &self,
5049 context: &crate::protocol::kawa_h1::editor::HttpContext,
5050 ) -> bool {
5051 use crate::protocol::kawa_h1::parser::Method;
5052 // HEAD method responses (only relevant when reading backend responses)
5053 if self.position.is_client() && context.method == Some(Method::Head) {
5054 return true;
5055 }
5056 // 1xx, 204, 304 status codes
5057 if let Some(status) = context.status {
5058 if (100..200).contains(&status) || status == 204 || status == 304 {
5059 return true;
5060 }
5061 }
5062 false
5063 }
5064
5065 fn handle_data_frame<E, L>(
5066 &mut self,
5067 data: parser::Data,
5068 wire_payload_len: u32,
5069 context: &mut Context<L>,
5070 mut endpoint: E,
5071 ) -> MuxResult
5072 where
5073 E: Endpoint,
5074 L: ListenerHandler + L7ListenerHandler,
5075 {
5076 // CVE-2019-9518: track empty DATA frames (no payload, no END_STREAM)
5077 if data.payload.is_empty() && !data.end_stream {
5078 let empty_before = self.flood_detector.empty_data_count;
5079 self.flood_detector.empty_data_count += 1;
5080 debug_assert_eq!(
5081 self.flood_detector.empty_data_count,
5082 empty_before + 1,
5083 "empty-DATA flood counter must advance by exactly one per empty frame"
5084 );
5085 check_flood_or_return!(self);
5086 }
5087 let Some(global_stream_id) = self.streams.get(&data.stream_id).copied() else {
5088 // The stream was terminated while data was expected,
5089 // probably due to automatic answer for invalid/unauthorized access.
5090 // RFC 9113 §6.9: we MUST still account for the DATA payload in
5091 // connection-level flow control using the full wire length
5092 // (including pad-length byte and padding), otherwise the window
5093 // shrinks permanently and eventually stalls the connection.
5094 self.flow_control.received_bytes_since_update += wire_payload_len;
5095 let conn_threshold = self.connection_config.initial_connection_window / 2;
5096 if self.flow_control.received_bytes_since_update >= conn_threshold {
5097 let increment = self.flow_control.received_bytes_since_update;
5098 self.queue_window_update(0, increment);
5099 self.flow_control.received_bytes_since_update = 0;
5100 self.readiness.arm_writable();
5101 }
5102 self.attribute_bytes_to_overhead();
5103 return MuxResult::Continue;
5104 };
5105 let mut slice = data.payload;
5106 let stream = &mut context.streams[global_stream_id];
5107 // Unpadded application payload size — what is forwarded to the backend
5108 // and counted against Content-Length.
5109 let content_len = slice.len();
5110 // Full wire-payload size (includes pad-length byte and padding).
5111 // RFC 9113 §5.2: padding counts against flow-control windows.
5112 let wire_len = wire_payload_len as usize;
5113 let cl_exempt = self.content_length_exempt(&stream.context);
5114
5115 // Extract declared content-length and update position-aware data counter
5116 let (data_received, declared_length) = {
5117 let parts = stream.split(&self.position);
5118 *parts.data_received += content_len;
5119 let total = *parts.data_received;
5120 let declared = match parts.rbuffer.body_size {
5121 kawa::BodySize::Length(n) => Some(n),
5122 _ => None,
5123 };
5124 (total, declared)
5125 };
5126
5127 // RFC 9113 §6.9 + §5.2: credit connection-level flow control BEFORE any
5128 // early-return path. Malformed DATA still consumed the peer's send
5129 // window; without crediting it back, repeated bad streams permanently
5130 // shrink the connection window and stall unrelated streams that share
5131 // the same H2 connection. Stream-level credit can stay below — once we
5132 // RST the violating stream, its per-stream window is moot per
5133 // RFC 9113 §6.9 (the receiver discards further frames for the stream).
5134 let conn_threshold = self.connection_config.initial_connection_window / 2;
5135 self.flow_control.received_bytes_since_update += wire_payload_len;
5136 if self.flow_control.received_bytes_since_update >= conn_threshold {
5137 let increment = self.flow_control.received_bytes_since_update;
5138 self.queue_window_update(0, increment);
5139 self.flow_control.received_bytes_since_update = 0;
5140 }
5141
5142 // RFC 9113 §8.1.1: if Content-Length is present, total DATA payload
5143 // must not exceed the declared length (check on every frame).
5144 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses (body absent by definition).
5145 if !cl_exempt {
5146 if let Some(expected) = declared_length {
5147 if data_received > expected {
5148 error!(
5149 "{} Content-Length mismatch: received {} > declared {}",
5150 log_context!(self),
5151 data_received,
5152 expected
5153 );
5154 // Pair WRITABLE arming with the queued connection-level
5155 // WINDOW_UPDATE before returning; otherwise the credit sits
5156 // until the next inbound frame on this connection.
5157 if !self.flow_control.pending_window_updates.is_empty() {
5158 self.readiness.arm_writable();
5159 }
5160 let result = self.reset_stream(
5161 data.stream_id,
5162 global_stream_id,
5163 context,
5164 endpoint,
5165 H2Error::ProtocolError,
5166 );
5167 self.remove_dead_stream(data.stream_id, global_stream_id);
5168 return result;
5169 }
5170 }
5171 }
5172
5173 let stream = &mut context.streams[global_stream_id];
5174 self.attribute_bytes_to_stream(&mut stream.metrics);
5175 let stream_state = stream.state;
5176 let is_unlinked = matches!(stream_state, StreamState::Unlinked);
5177 let parts = stream.split(&self.position);
5178 let kawa = parts.rbuffer;
5179 self.position.count_bytes_in(parts.metrics, content_len);
5180
5181 // Stream-level flow control (only if stream is still open).
5182 // Connection-level credit was already applied above the CL check so
5183 // malformed DATA cannot starve the connection window for other streams.
5184 if !data.end_stream {
5185 self.queue_window_update(data.stream_id, wire_payload_len);
5186 }
5187
5188 // If we have pending updates, ensure we get a writable event.
5189 // Must use signal_pending_write() — not just interest.insert() — because
5190 // under edge-triggered epoll the WRITABLE event bit may have been consumed
5191 // by a previous write cycle. Without the event bit set, filter_interest()
5192 // returns 0 and the WINDOW_UPDATEs never get flushed, stalling the client.
5193 if !self.flow_control.pending_window_updates.is_empty() {
5194 self.readiness.arm_writable();
5195 }
5196
5197 // Refresh per-stream idle timer on non-empty DATA.
5198 // Empty DATA frames (CVE-2019-9518 vector) must NOT reset the timer,
5199 // otherwise an attacker can keep a stream alive indefinitely with
5200 // zero-length frames while pinning a MAX_CONCURRENT_STREAMS slot.
5201 if content_len > 0 {
5202 if let Some(t) = self.stream_last_activity_at.get_mut(&data.stream_id) {
5203 *t = Instant::now();
5204 }
5205 }
5206
5207 if is_unlinked {
5208 // Backend is gone but client is still sending DATA.
5209 // Discard the data (flow control updates were already
5210 // queued above) to prevent the buffer from filling up.
5211 kawa.storage.clear();
5212 if data.end_stream {
5213 kawa.parsing_phase = kawa::ParsingPhase::Terminated;
5214 self.mark_end_of_stream(stream);
5215 }
5216 } else {
5217 // Advance storage.head by the full wire payload length so the
5218 // next frame doesn't read stale pad-length+padding bytes.
5219 slice.start = slice.start.saturating_add(kawa.storage.head as u32);
5220 kawa.storage.head += wire_len;
5221
5222 // Emit chunk framing for chunked transfer encoding (H2→H1 path).
5223 // H2 converter ignores ChunkHeader and end_chunk Flags, so this is safe for H2→H2.
5224 if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
5225 let hex_len = {
5226 let mut buf = Vec::with_capacity(16);
5227 let _ = write!(buf, "{content_len:x}");
5228 buf
5229 };
5230 kawa.push_block(kawa::Block::ChunkHeader(kawa::ChunkHeader {
5231 length: kawa::Store::from_vec(hex_len),
5232 }));
5233 }
5234
5235 kawa.push_block(kawa::Block::Chunk(kawa::Chunk {
5236 data: kawa::Store::Slice(slice),
5237 }));
5238
5239 if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
5240 kawa.push_block(kawa::Block::Flags(kawa::Flags {
5241 end_body: false,
5242 end_chunk: true,
5243 end_header: false,
5244 end_stream: false,
5245 }));
5246 }
5247
5248 if data.end_stream {
5249 // RFC 9113 §8.1.1: on end_stream, total DATA must equal Content-Length.
5250 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
5251 if !cl_exempt {
5252 if let Some(expected) = declared_length {
5253 if data_received != expected {
5254 error!(
5255 "{} Content-Length mismatch: received {} != declared {}",
5256 log_context!(self),
5257 data_received,
5258 expected
5259 );
5260 let result = self.reset_stream(
5261 data.stream_id,
5262 global_stream_id,
5263 context,
5264 endpoint,
5265 H2Error::ProtocolError,
5266 );
5267 self.remove_dead_stream(data.stream_id, global_stream_id);
5268 return result;
5269 }
5270 }
5271 }
5272 let is_chunked = kawa.body_size == kawa::BodySize::Chunked;
5273 kawa.push_block(kawa::Block::Flags(kawa::Flags {
5274 end_body: true,
5275 end_chunk: is_chunked,
5276 end_header: false,
5277 end_stream: true,
5278 }));
5279 kawa.parsing_phase = kawa::ParsingPhase::Terminated;
5280 self.mark_end_of_stream(stream);
5281 }
5282 if let StreamState::Linked(token) = stream_state {
5283 // Mirror of h1.rs:361-368 for the H2-backend → H2-frontend
5284 // path: edge-triggered epoll will NOT re-fire for bytes we
5285 // just pushed into stream.back; the synthetic event is the
5286 // only wake path. LIFECYCLE invariant 15.
5287 endpoint.readiness_mut(token).arm_writable();
5288 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_DATA);
5289 }
5290 }
5291 MuxResult::Continue
5292 }
5293
5294 fn handle_headers_frame<E, L>(
5295 &mut self,
5296 headers: Headers,
5297 context: &mut Context<L>,
5298 mut endpoint: E,
5299 ) -> MuxResult
5300 where
5301 E: Endpoint,
5302 L: ListenerHandler + L7ListenerHandler,
5303 {
5304 // HEADERS frames represent real application activity (new request
5305 // or response). Reset the timeout since the peer is actively
5306 // communicating, unlike control frames (PING, WINDOW_UPDATE).
5307 self.timeout_container.reset();
5308 if !headers.end_headers {
5309 // CVE-2024-27316: only initialize tracking on the very first HEADERS
5310 // fragment, not on re-entries from ContinuationFrame (which call
5311 // handle_frame(Frame::Headers) with the accumulated header block).
5312 if self.flood_detector.continuation_count == 0 {
5313 self.flood_detector.accumulated_header_size = headers.header_block_fragment.len;
5314 }
5315 debug!(
5316 "{} FRAGMENT: stream_id={}, len={}",
5317 log_context!(self),
5318 headers.stream_id,
5319 self.zero.storage.data().len()
5320 );
5321 self.state = H2State::ContinuationHeader(headers);
5322 return MuxResult::Continue;
5323 }
5324 // Header block is complete — reset CONTINUATION counters
5325 self.flood_detector.reset_continuation();
5326 // can this fail?
5327 let stream_id = headers.stream_id;
5328 let Some(global_stream_id) = self.streams.get(&stream_id).copied() else {
5329 error!(
5330 "{} Handling Headers frame with no attached stream {:#?}",
5331 log_context!(self),
5332 self
5333 );
5334 incr!(names::h2::HEADERS_NO_STREAM_ERROR);
5335 self.attribute_bytes_to_overhead();
5336 return self.force_disconnect();
5337 };
5338
5339 // Refresh per-stream idle timer on HEADERS (response headers or trailers
5340 // on an existing stream). Initial HEADERS that create the stream already
5341 // set the timestamp in create_stream().
5342 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
5343 *t = Instant::now();
5344 }
5345
5346 if let Some(priority) = &headers.priority {
5347 if self.prioriser.push_priority(stream_id, priority.clone()) {
5348 self.reset_stream(
5349 stream_id,
5350 global_stream_id,
5351 context,
5352 endpoint,
5353 H2Error::ProtocolError,
5354 );
5355 self.remove_dead_stream(stream_id, global_stream_id);
5356 return MuxResult::Continue;
5357 }
5358 }
5359
5360 let stream = &mut context.streams[global_stream_id];
5361 self.attribute_bytes_to_stream(&mut stream.metrics);
5362 let kawa = &mut self.zero;
5363 let buffer = headers.header_block_fragment.data(kawa.storage.buffer());
5364 let stream = &mut context.streams[global_stream_id];
5365 let parts = &mut stream.split(&self.position);
5366 let was_initial = parts.rbuffer.is_initial();
5367 let elide_x_real_ip = parts.context.elide_x_real_ip;
5368 let status = pkawa::handle_header(
5369 &mut self.decoder,
5370 &mut self.prioriser,
5371 stream_id,
5372 parts.rbuffer,
5373 buffer,
5374 headers.end_stream,
5375 parts.context,
5376 self.flood_detector.config.max_header_list_size,
5377 self.flood_detector.config.max_header_fields,
5378 elide_x_real_ip,
5379 );
5380 kawa.storage.clear();
5381 if let Err((error, global)) = status {
5382 match self.position {
5383 Position::Client(..) => incr!(names::http::BACKEND_PARSE_ERRORS),
5384 Position::Server => incr!(names::http::FRONTEND_PARSE_ERRORS),
5385 }
5386 if global {
5387 error!(
5388 "{} GOT GLOBAL ERROR WHILE PROCESSING HEADERS",
5389 log_context!(self)
5390 );
5391 return self.goaway(error);
5392 } else {
5393 let result =
5394 self.reset_stream(stream_id, global_stream_id, context, endpoint, error);
5395 self.remove_dead_stream(stream_id, global_stream_id);
5396 return result;
5397 }
5398 }
5399 if headers.end_stream {
5400 // RFC 9113 §8.1.1: when END_STREAM arrives via trailers,
5401 // validate that total DATA received matches Content-Length.
5402 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
5403 if !was_initial && !self.content_length_exempt(&stream.context) {
5404 let parts = stream.split(&self.position);
5405 if let kawa::BodySize::Length(expected) = parts.rbuffer.body_size {
5406 if *parts.data_received != expected {
5407 error!(
5408 "{} Content-Length mismatch on trailers: received {} != declared {}",
5409 log_context!(self),
5410 *parts.data_received,
5411 expected
5412 );
5413 let result = self.reset_stream(
5414 stream_id,
5415 global_stream_id,
5416 context,
5417 endpoint,
5418 H2Error::ProtocolError,
5419 );
5420 self.remove_dead_stream(stream_id, global_stream_id);
5421 return result;
5422 }
5423 }
5424 }
5425 self.mark_end_of_stream(stream);
5426 }
5427 if let StreamState::Linked(token) = stream.state {
5428 // Mirror of handle_data_frame's rearm. LIFECYCLE invariant 15.
5429 endpoint.readiness_mut(token).arm_writable();
5430 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_HEADERS);
5431 }
5432 // was_initial prevents trailers from triggering connection
5433 if was_initial && self.position.is_server() {
5434 incr!(names::http::REQUESTS);
5435 gauge_add!(names::http::ACTIVE_REQUESTS, 1);
5436 stream.metrics.service_start();
5437 stream.request_counted = true;
5438 stream.state = StreamState::Link;
5439 context.pending_links.push_back(global_stream_id);
5440 }
5441 MuxResult::Continue
5442 }
5443
5444 fn handle_push_promise_frame(&mut self) -> MuxResult {
5445 self.attribute_bytes_to_overhead();
5446 match self.position {
5447 Position::Client(..) => {
5448 // RFC 9113 §8.4: Server push is deprecated. Sozu never sends
5449 // SETTINGS_ENABLE_PUSH=1, so receiving PUSH_PROMISE is a protocol error.
5450 error!(
5451 "{} Received PUSH_PROMISE but server push is not supported",
5452 log_context!(self)
5453 );
5454 self.goaway(H2Error::ProtocolError)
5455 }
5456 Position::Server => {
5457 // Clients must never send PUSH_PROMISE (RFC 9113 §8.4)
5458 error!("{} Received PUSH_PROMISE from client", log_context!(self));
5459 self.goaway(H2Error::ProtocolError)
5460 }
5461 }
5462 }
5463
5464 fn handle_priority_frame<E, L>(
5465 &mut self,
5466 priority: parser::Priority,
5467 context: &mut Context<L>,
5468 endpoint: E,
5469 ) -> MuxResult
5470 where
5471 E: Endpoint,
5472 L: ListenerHandler + L7ListenerHandler,
5473 {
5474 if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
5475 let stream = &mut context.streams[global_stream_id];
5476 self.attribute_bytes_to_stream(&mut stream.metrics);
5477 } else {
5478 self.attribute_bytes_to_overhead();
5479 }
5480 // Pass 3 Medium #4: standalone PRIORITY frames can arrive for any
5481 // peer-chosen stream ID. Accept only currently-open streams and a
5482 // small idle look-ahead window; everything else is dropped before
5483 // it can feed memory into the priority map.
5484 if self.prioriser.push_priority_guarded(
5485 priority.stream_id,
5486 priority.inner,
5487 self.last_stream_id,
5488 &self.streams,
5489 ) {
5490 if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
5491 let result = self.reset_stream(
5492 priority.stream_id,
5493 global_stream_id,
5494 context,
5495 endpoint,
5496 H2Error::ProtocolError,
5497 );
5498 self.remove_dead_stream(priority.stream_id, global_stream_id);
5499 return result;
5500 } else {
5501 error!(
5502 "{} INVALID PRIORITY RECEIVED ON INVALID STREAM",
5503 log_context!(self)
5504 );
5505 return self.goaway(H2Error::ProtocolError);
5506 }
5507 }
5508 MuxResult::Continue
5509 }
5510
5511 /// RFC 9218 §7.1: PRIORITY_UPDATE reprioritizes an open or idle-soon
5512 /// stream at the connection level. Decodes the priority field value
5513 /// (same grammar as the `priority` request header, `parse_rfc9218_priority`)
5514 /// and pushes it into the `Prioriser` through the same guarded path used
5515 /// for standalone PRIORITY frames — the guard bounds memory against a
5516 /// client spamming PRIORITY_UPDATE for far-future stream IDs.
5517 ///
5518 /// Prioritized stream ID `0` is a connection-level `PROTOCOL_ERROR`
5519 /// (RFC 9218 §7.1). For any other ID that is not currently open or
5520 /// within the idle look-ahead budget, the update is silently dropped
5521 /// (matches the PRIORITY-frame guard semantics — no state change).
5522 fn handle_priority_update_frame(&mut self, pu: parser::PriorityUpdate) -> MuxResult {
5523 self.attribute_bytes_to_overhead();
5524 if pu.prioritized_stream_id == 0 {
5525 error!(
5526 "{} PRIORITY_UPDATE with prioritized_stream_id=0 (RFC 9218 §7.1)",
5527 log_context!(self)
5528 );
5529 return self.goaway(H2Error::ProtocolError);
5530 }
5531 let (urgency, incremental) = pkawa::parse_rfc9218_priority(&pu.priority_field_value);
5532 let (prev_urgency, _) = self.prioriser.get(&pu.prioritized_stream_id);
5533 trace!(
5534 "{} PRIORITY_UPDATE stream={} urgency={}->{} incremental={} rearmed_writable=true",
5535 log_context!(self),
5536 pu.prioritized_stream_id,
5537 prev_urgency,
5538 urgency,
5539 incremental
5540 );
5541 let _ = self.prioriser.push_priority_guarded(
5542 pu.prioritized_stream_id,
5543 parser::PriorityPart::Rfc9218 {
5544 urgency,
5545 incremental,
5546 },
5547 self.last_stream_id,
5548 &self.streams,
5549 );
5550 // LIFECYCLE invariant 15: reprioritisation only changes ordering for
5551 // the NEXT write pass. Under ET epoll, if finalize_write already
5552 // stripped WRITABLE, the scheduler won't re-run without a synthetic
5553 // wake — pair the interest insert with signal_pending_write.
5554 self.readiness.arm_writable();
5555 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PRIORITY_UPDATE);
5556 MuxResult::Continue
5557 }
5558
5559 fn handle_rst_stream_frame<E, L>(
5560 &mut self,
5561 rst_stream: parser::RstStream,
5562 context: &mut Context<L>,
5563 mut endpoint: E,
5564 ) -> MuxResult
5565 where
5566 E: Endpoint,
5567 L: ListenerHandler + L7ListenerHandler,
5568 {
5569 // Per-error-code counter for the inbound RST. Emitted before the
5570 // flood-detector trip check so even a connection that gets terminated
5571 // by `handle_flood_violation` shows up in the per-code breakdown
5572 // (the dedicated `h2.flood.violation.rst_stream_*` series tracks the
5573 // mitigation event itself).
5574 count!(metric_for_rst_stream_received(rst_stream.error_code), 1);
5575 // CVE-2023-44487 Rapid Reset + CVE-2019-9514: track RST_STREAM rate.
5576 let rst_count_before = self.flood_detector.rst_stream_count;
5577 self.flood_detector.rst_stream_count += 1;
5578 debug_assert_eq!(
5579 self.flood_detector.rst_stream_count,
5580 rst_count_before + 1,
5581 "per-window RST_STREAM counter must advance by exactly one per inbound RST"
5582 );
5583 check_flood_or_return!(self);
5584 // Additional CVE-2023-44487 mitigation: lifetime cap on RST_STREAM
5585 // frames received. The per-window counter above half-decays, so a
5586 // patient client can keep ~50 RST/s forever; a never-decaying
5587 // lifetime counter puts an absolute ceiling on that amplification.
5588 // Streams whose backend response has not yet started count toward a
5589 // much lower "abusive" ceiling — this is the signature Rapid Reset
5590 // pattern where the attacker pays one RST frame and we pay a
5591 // backend round-trip for each.
5592 //
5593 // "Response started" here means the Server has begun producing
5594 // response bytes (backend kawa buffer past its initial phase). For
5595 // the Client position the concept does not apply symmetrically
5596 // (RSTs received from the backend are rare and benign), so we
5597 // conservatively flag them as abusive too — lifetime cap still
5598 // dominates in practice.
5599 let response_started = match self.streams.get(&rst_stream.stream_id) {
5600 Some(global_stream_id) => {
5601 let stream = &context.streams[*global_stream_id];
5602 !stream.back.is_initial()
5603 }
5604 // Stream already gone (e.g. closed, not yet registered) —
5605 // treat as response-started to avoid over-counting benign
5606 // races as abusive.
5607 None => true,
5608 };
5609 if let Some(violation) = self.flood_detector.record_rst_lifetime(response_started) {
5610 return self.handle_flood_violation(violation);
5611 }
5612 // Rapid Reset signature (CVE-2023-44487): a RST that arrives before the
5613 // backend has begun answering. Emitted alongside the per-code counter
5614 // so the SOC can alert on the rate of pre-response RSTs without
5615 // having to differentiate by error code.
5616 if !response_started {
5617 count!(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START, 1);
5618 }
5619 debug!(
5620 "{} RstStream({} -> {})",
5621 log_context!(self),
5622 rst_stream.error_code,
5623 H2Error::try_from(rst_stream.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str())
5624 );
5625 // Compute totals before removing the stream from the map,
5626 // so the removed stream's bytes are included in the total.
5627 let rst_byte_totals = self.compute_stream_byte_totals(context);
5628 if let Some(global_stream_id) = self.streams.get(&rst_stream.stream_id).copied() {
5629 let stream = &mut context.streams[global_stream_id];
5630 self.attribute_bytes_to_stream(&mut stream.metrics);
5631 let linked_token = stream.linked_token();
5632 let (client_rtt, server_rtt) =
5633 Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
5634 if let Some(token) = linked_token {
5635 endpoint.end_stream(token, global_stream_id, context);
5636 }
5637 let stream = &mut context.streams[global_stream_id];
5638 match &self.position {
5639 // Inbound RST_STREAM on the backend side terminates the in-flight
5640 // request without going through Connection::end_stream (the normal
5641 // place where Backend.active_requests is decremented), so do the
5642 // bookkeeping explicitly here to avoid leaking load counters.
5643 Position::Client(_, backend, BackendStatus::Connected) => {
5644 let mut backend_borrow = backend.borrow_mut();
5645 backend_borrow.active_requests =
5646 backend_borrow.active_requests.saturating_sub(1);
5647 }
5648 Position::Client(..) => {}
5649 Position::Server => {
5650 self.distribute_overhead(&mut stream.metrics, rst_byte_totals);
5651 // This is a special case, normally, all stream are terminated by the server
5652 // when the last byte of the response is written. Here, the reset is requested
5653 // on the server endpoint and immediately terminates, shortcutting the other path
5654 stream.metrics.backend_stop();
5655 stream.generate_access_log(
5656 true,
5657 Some("H2::ResetFrame"),
5658 context.listener.clone(),
5659 client_rtt,
5660 server_rtt,
5661 );
5662 stream.state = StreamState::Recycle;
5663 }
5664 }
5665 // Retire from streams/prioriser/stream_last_activity_at and
5666 // invalidate expect_write/expect_read if they reference this gid.
5667 self.remove_dead_stream(rst_stream.stream_id, global_stream_id);
5668 } else {
5669 self.attribute_bytes_to_overhead();
5670 }
5671 MuxResult::Continue
5672 }
5673
5674 fn handle_settings_frame<L>(
5675 &mut self,
5676 settings: parser::Settings,
5677 context: &mut Context<L>,
5678 ) -> MuxResult
5679 where
5680 L: ListenerHandler + L7ListenerHandler,
5681 {
5682 if settings.ack {
5683 // RFC 9113 §6.5: SETTINGS ACK must have empty payload
5684 if !settings.settings.is_empty() {
5685 error!("{} SETTINGS ACK with non-empty payload", log_context!(self));
5686 return self.goaway(H2Error::FrameSizeError);
5687 }
5688 // RFC 9113 §6.5: peer acknowledged our SETTINGS — clear timeout
5689 self.settings_sent_at = None;
5690 // RFC 7541 §4.2: sync the decoder's max allowed table size with
5691 // what we advertised. Currently a no-op (settings don't change at
5692 // runtime), but guards against future runtime SETTINGS updates.
5693 self.decoder.set_max_allowed_table_size(
5694 self.local_settings.settings_header_table_size as usize,
5695 );
5696 self.attribute_bytes_to_overhead();
5697 return MuxResult::Continue;
5698 }
5699 // CVE-2019-9515: track SETTINGS frame rate
5700 let settings_count_before = self.flood_detector.settings_count;
5701 let settings_lifetime_before = self.flood_detector.total_settings_received_lifetime;
5702 self.flood_detector.settings_count += 1;
5703 self.flood_detector.total_settings_received_lifetime = self
5704 .flood_detector
5705 .total_settings_received_lifetime
5706 .saturating_add(1);
5707 debug_assert_eq!(
5708 self.flood_detector.settings_count,
5709 settings_count_before + 1,
5710 "per-window SETTINGS counter must advance by one per non-ACK SETTINGS"
5711 );
5712 debug_assert!(
5713 self.flood_detector.total_settings_received_lifetime > settings_lifetime_before
5714 || settings_lifetime_before == u32::MAX,
5715 "lifetime SETTINGS counter must advance (or already be saturated)"
5716 );
5717 check_flood_or_return!(self);
5718 for setting in settings.settings {
5719 let v = setting.value;
5720 let mut is_error = false;
5721 #[rustfmt::skip]
5722 match setting.identifier {
5723 parser::SETTINGS_HEADER_TABLE_SIZE => {
5724 // Cap to the configured maximum — a malicious peer can
5725 // advertise up to 4 GB to inflate HPACK encoder memory.
5726 let cap = self.flood_detector.config.max_header_table_size;
5727 let capped = v.min(cap);
5728 self.peer_settings.settings_header_table_size = capped;
5729 self.encoder.set_max_table_size(capped as usize);
5730 // RFC 7541 §4.2 / §6.3: queue a dynamic-table-size-update
5731 // HPACK directive for the next header block we emit.
5732 // Without it, the peer's decoder keeps its previous (possibly
5733 // larger) table cap and our encoder-side change is silent
5734 // — conformance suites (h2spec `hpack/4.2`) will flag it.
5735 self.pending_table_size_update = Some(capped);
5736 },
5737 parser::SETTINGS_ENABLE_PUSH => { self.peer_settings.settings_enable_push = v == 1; is_error |= v > 1 },
5738 parser::SETTINGS_MAX_CONCURRENT_STREAMS => { self.peer_settings.settings_max_concurrent_streams = v },
5739 parser::SETTINGS_INITIAL_WINDOW_SIZE => { is_error |= self.update_initial_window_size(v, context) },
5740 parser::SETTINGS_MAX_FRAME_SIZE => { self.peer_settings.settings_max_frame_size = v; is_error |= !(MIN_MAX_FRAME_SIZE..MAX_MAX_FRAME_SIZE).contains(&v) },
5741 parser::SETTINGS_MAX_HEADER_LIST_SIZE => { self.peer_settings.settings_max_header_list_size = v },
5742 parser::SETTINGS_ENABLE_CONNECT_PROTOCOL => { self.peer_settings.settings_enable_connect_protocol = v == 1; is_error |= v > 1 },
5743 parser::SETTINGS_NO_RFC7540_PRIORITIES => { self.peer_settings.settings_no_rfc7540_priorities = v == 1; is_error |= v > 1 },
5744 other => { warn!("Unknown setting_id: {}, we MUST ignore this", other); self.flood_detector.glitch_count += 1 },
5745 };
5746 if is_error {
5747 error!("{} INVALID SETTING", log_context!(self));
5748 return self.goaway(H2Error::ProtocolError);
5749 }
5750 }
5751
5752 self.attribute_bytes_to_overhead();
5753
5754 // Enlarge the connection-level receive window for backend H2
5755 // connections (Position::Client). The server side does this in
5756 // the ServerSettings writable path, but the client needs to do
5757 // it here after receiving the server's initial SETTINGS.
5758 if self.position.is_client()
5759 && self.flow_control.window <= DEFAULT_INITIAL_WINDOW_SIZE as i32
5760 {
5761 let increment = self
5762 .connection_config
5763 .initial_connection_window
5764 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
5765 if increment > 0 {
5766 self.queue_window_update(0, increment);
5767 }
5768 // Do NOT increment flow_control.window here: sending our own
5769 // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
5770 // Our send window is only updated by WINDOW_UPDATEs we receive
5771 // from the peer (RFC 9113 §6.9).
5772 }
5773
5774 let kawa = &mut self.zero;
5775 let ack = &serializer::SETTINGS_ACKNOWLEDGEMENT;
5776 let buf = kawa.storage.space();
5777 if buf.len() < ack.len() {
5778 error!(
5779 "{} No space in zero buffer for SETTINGS ACK ({} available, {} needed)",
5780 log_context!(self),
5781 buf.len(),
5782 ack.len()
5783 );
5784 return self.force_disconnect();
5785 }
5786 buf[..ack.len()].copy_from_slice(ack);
5787 kawa.storage.fill(ack.len());
5788
5789 self.readiness.interest.insert(Ready::WRITABLE);
5790 self.readiness.interest.remove(Ready::READABLE);
5791 self.expect_write = Some(H2StreamId::Zero);
5792 self.readiness.signal_pending_write();
5793 MuxResult::Continue
5794 }
5795
5796 fn handle_ping_frame(&mut self, ping: parser::Ping) -> MuxResult {
5797 if ping.ack {
5798 self.attribute_bytes_to_overhead();
5799 return MuxResult::Continue;
5800 }
5801 // CVE-2019-9512: track non-ACK PING frame rate
5802 let ping_count_before = self.flood_detector.ping_count;
5803 let ping_lifetime_before = self.flood_detector.total_ping_received_lifetime;
5804 self.flood_detector.ping_count += 1;
5805 self.flood_detector.total_ping_received_lifetime = self
5806 .flood_detector
5807 .total_ping_received_lifetime
5808 .saturating_add(1);
5809 debug_assert_eq!(
5810 self.flood_detector.ping_count,
5811 ping_count_before + 1,
5812 "per-window PING counter must advance by one per non-ACK PING"
5813 );
5814 debug_assert!(
5815 self.flood_detector.total_ping_received_lifetime > ping_lifetime_before
5816 || ping_lifetime_before == u32::MAX,
5817 "lifetime PING counter must advance (or already be saturated)"
5818 );
5819 check_flood_or_return!(self);
5820 self.attribute_bytes_to_overhead();
5821 let kawa = &mut self.zero;
5822 let ping_response_size = serializer::PING_ACKNOWLEDGEMENT_HEADER.len() + 8;
5823 if kawa.storage.space().len() < ping_response_size {
5824 error!(
5825 "{} No space in zero buffer for PING response ({} available, {} needed)",
5826 log_context!(self),
5827 kawa.storage.space().len(),
5828 ping_response_size
5829 );
5830 return self.force_disconnect();
5831 }
5832 match serializer::gen_ping_acknowledgement(kawa.storage.space(), &ping.payload) {
5833 Ok((_, size)) => {
5834 kawa.storage.fill(size);
5835 incr!(names::h2::FRAMES_TX_PING_ACK);
5836 }
5837 Err(error) => {
5838 error!(
5839 "{} Could not serialize PingFrame: {:?}",
5840 log_context!(self),
5841 error
5842 );
5843 return self.force_disconnect();
5844 }
5845 };
5846 self.readiness.interest.insert(Ready::WRITABLE);
5847 self.readiness.interest.remove(Ready::READABLE);
5848 self.expect_write = Some(H2StreamId::Zero);
5849 self.readiness.signal_pending_write();
5850 MuxResult::Continue
5851 }
5852
5853 fn handle_goaway_frame<E, L>(
5854 &mut self,
5855 goaway: parser::GoAway,
5856 context: &mut Context<L>,
5857 mut endpoint: E,
5858 ) -> MuxResult
5859 where
5860 E: Endpoint,
5861 L: ListenerHandler + L7ListenerHandler,
5862 {
5863 self.attribute_bytes_to_overhead();
5864 let error_name =
5865 H2Error::try_from(goaway.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str());
5866 if goaway.error_code == H2Error::NoError as u32 {
5867 debug!(
5868 "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5869 log_context!(self),
5870 goaway.last_stream_id,
5871 error_name,
5872 goaway.additional_debug_data
5873 );
5874 } else {
5875 // Peer-originated failure: no variant of H2Error from a peer
5876 // implies a sozu bug. Impact handling is separate (retry above
5877 // `last_stream_id`, RST_STREAM for consumed streams) and logs
5878 // its own details below, so the summary drops to `warn!`.
5879 warn!(
5880 "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5881 log_context!(self),
5882 goaway.last_stream_id,
5883 error_name,
5884 goaway.additional_debug_data
5885 );
5886 }
5887 count!(metric_for_goaway_received(goaway.error_code), 1);
5888 // RFC 9113 §6.8: begin graceful drain.
5889 self.drain.draining = true;
5890 self.drain.peer_last_stream_id = Some(goaway.last_stream_id);
5891
5892 // Streams with ID > last_stream_id were NOT processed by the peer.
5893 // Mark them for retry (StreamState::Link) so they can be retried
5894 // on a new connection.
5895 // IMPORTANT: do NOT call endpoint.end_stream() here — that would
5896 // remove the stream from the frontend's H2 stream map and send
5897 // RST_STREAM to the client, killing the request instead of retrying it.
5898 let mut retry_streams = Vec::new();
5899 for (&stream_id, &global_stream_id) in &self.streams {
5900 if stream_id > goaway.last_stream_id {
5901 retry_streams.push((stream_id, global_stream_id));
5902 }
5903 }
5904 for (stream_id, global_stream_id) in &retry_streams {
5905 // Remove from reverse index before transitioning away from Linked.
5906 if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5907 remove_backend_stream(&mut context.backend_streams, token, *global_stream_id);
5908 }
5909 let stream = &mut context.streams[*global_stream_id];
5910 if stream.front.consumed {
5911 // Request was already sent to this backend — we can't
5912 // replay it. Use the linked token's readiness (via endpoint)
5913 // so the RST_STREAM reaches the client.
5914 debug!(
5915 "{} GOAWAY: stream {} already consumed, cannot retry",
5916 log_context!(self),
5917 stream_id
5918 );
5919 if let StreamState::Linked(token) = stream.state {
5920 let front_readiness = endpoint.readiness_mut(token);
5921 forcefully_terminate_answer(stream, front_readiness, H2Error::RefusedStream);
5922 } else {
5923 warn!(
5924 "{} GOAWAY: stream {} consumed but not Linked, cannot notify frontend",
5925 log_context!(self),
5926 stream_id
5927 );
5928 }
5929 } else {
5930 stream.state = StreamState::Link;
5931 context.pending_links.push_back(*global_stream_id);
5932 }
5933 // Both retry (!consumed) and terminated (consumed) paths remove the
5934 // stream from self.streams without going through Connection::end_stream,
5935 // so decrement Backend.active_requests here to keep load metrics honest.
5936 if let Position::Client(_, backend, BackendStatus::Connected) = &self.position {
5937 let mut backend_borrow = backend.borrow_mut();
5938 backend_borrow.active_requests = backend_borrow.active_requests.saturating_sub(1);
5939 }
5940 // Retire from streams/prioriser/stream_last_activity_at and
5941 // invalidate expect_write/expect_read if they reference this gid.
5942 self.remove_dead_stream(*stream_id, *global_stream_id);
5943 }
5944
5945 // If no active streams remain, close immediately
5946 if self.streams.is_empty() {
5947 return self.goaway(H2Error::NoError);
5948 }
5949
5950 // Otherwise, let remaining streams (ID <= last_stream_id) complete.
5951 // The connection will be closed when all streams finish.
5952 MuxResult::Continue
5953 }
5954
5955 fn handle_window_update_frame<E, L>(
5956 &mut self,
5957 wu: WindowUpdate,
5958 context: &mut Context<L>,
5959 endpoint: E,
5960 ) -> MuxResult
5961 where
5962 E: Endpoint,
5963 L: ListenerHandler + L7ListenerHandler,
5964 {
5965 let stream_id = wu.stream_id;
5966 let increment = wu.increment;
5967
5968 // RFC 9113 §6.9: increment of 0 MUST be treated as an error.
5969 // Connection-level (stream 0) -> connection error (GOAWAY).
5970 // Stream-level -> stream error (RST_STREAM).
5971 if increment == 0 {
5972 if stream_id == 0 {
5973 error!(
5974 "{} WINDOW_UPDATE with zero increment on connection (stream 0)",
5975 log_context!(self)
5976 );
5977 return self.goaway(H2Error::ProtocolError);
5978 } else {
5979 error!(
5980 "{} WINDOW_UPDATE with zero increment on stream {}",
5981 log_context!(self),
5982 stream_id
5983 );
5984 if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5985 let result = self.reset_stream(
5986 stream_id,
5987 global_stream_id,
5988 context,
5989 endpoint,
5990 H2Error::ProtocolError,
5991 );
5992 self.remove_dead_stream(stream_id, global_stream_id);
5993 return result;
5994 }
5995 // Stream not in map (already closed) — treat as glitch
5996 self.flood_detector.glitch_count += 1;
5997 check_flood_or_return!(self);
5998 self.attribute_bytes_to_overhead();
5999 return MuxResult::Continue;
6000 }
6001 }
6002
6003 // The parser masks the reserved bit (STREAM_ID_MASK), so increment <=
6004 // 2^31-1 and try_from always succeeds. Use try_from rather than `as` to
6005 // guard against a future parser change that drops the mask.
6006 let increment = i32::try_from(increment).unwrap_or(i32::MAX);
6007 // RFC 9113 §6.9: a non-zero WINDOW_UPDATE increment is in [1, 2^31-1].
6008 // Zero was short-circuited above; this asserts the masked value is a
6009 // legal positive increment before we add it to a window.
6010 debug_assert!(
6011 increment > 0,
6012 "WINDOW_UPDATE increment must be strictly positive at this point (zero handled above)"
6013 );
6014 if stream_id == 0 {
6015 // Count connection-level WINDOW_UPDATEs before touching the window
6016 // so a per-window flood stops us before we pay the arithmetic cost
6017 // on a million-frame burst. Zero-increment frames short-circuited
6018 // above, so every increment here is a legal-looking rate consumer.
6019 let wu0_before = self.flood_detector.window_update_stream0_count;
6020 self.flood_detector.window_update_stream0_count = self
6021 .flood_detector
6022 .window_update_stream0_count
6023 .saturating_add(1);
6024 debug_assert!(
6025 self.flood_detector.window_update_stream0_count > wu0_before
6026 || wu0_before == u32::MAX,
6027 "stream-0 WINDOW_UPDATE flood counter must advance before the flood check"
6028 );
6029 check_flood_or_return!(self);
6030 self.attribute_bytes_to_overhead();
6031 let window_before = self.flow_control.window;
6032 if let Some(window) = self.flow_control.window.checked_add(increment) {
6033 if self.flow_control.window <= 0 && window > 0 {
6034 self.readiness.arm_writable();
6035 }
6036 self.flow_control.window = window;
6037 // Flow-control replenish invariant (RFC 9113 §6.9): the
6038 // connection send window grows by exactly `increment` and stays
6039 // within i32 (the `checked_add` already rejected overflow, which
6040 // is a FLOW_CONTROL_ERROR on the wire). The window may legally
6041 // be negative (a SETTINGS change can shrink it below zero) but
6042 // a WINDOW_UPDATE only ever increases it.
6043 debug_assert_eq!(
6044 self.flow_control.window,
6045 window_before + increment,
6046 "connection window must increase by exactly the increment"
6047 );
6048 debug_assert!(
6049 self.flow_control.window > window_before,
6050 "a positive WINDOW_UPDATE must strictly grow the connection window"
6051 );
6052 debug!(
6053 "{} WINDOW_UPDATE received: stream=0 increment={} new_connection_window={}",
6054 log_context!(self),
6055 increment,
6056 self.flow_control.window
6057 );
6058 } else {
6059 error!("{} INVALID WINDOW INCREMENT", log_context!(self));
6060 return self.goaway(H2Error::FlowControlError);
6061 }
6062 } else if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
6063 let stream = &mut context.streams[global_stream_id];
6064 self.attribute_bytes_to_stream(&mut stream.metrics);
6065 let stream_window_before = stream.window;
6066 if let Some(window) = stream.window.checked_add(increment) {
6067 if stream.window <= 0 && window > 0 {
6068 self.readiness.arm_writable();
6069 }
6070 stream.window = window;
6071 // Same replenish invariant as the connection window, applied to
6072 // the per-stream send window (RFC 9113 §6.9.1). Overflow past
6073 // 2^31-1 is rejected by `checked_add` and handled as a
6074 // FLOW_CONTROL_ERROR RST_STREAM below.
6075 debug_assert_eq!(
6076 stream.window,
6077 stream_window_before + increment,
6078 "stream window must increase by exactly the increment"
6079 );
6080 debug_assert!(
6081 stream.window > stream_window_before,
6082 "a positive WINDOW_UPDATE must strictly grow the stream window"
6083 );
6084 debug!(
6085 "{} WINDOW_UPDATE received: stream={} increment={} new_stream_window={}",
6086 log_context!(self),
6087 stream_id,
6088 increment,
6089 stream.window
6090 );
6091 } else {
6092 let result = self.reset_stream(
6093 stream_id,
6094 global_stream_id,
6095 context,
6096 endpoint,
6097 H2Error::FlowControlError,
6098 );
6099 self.remove_dead_stream(stream_id, global_stream_id);
6100 return result;
6101 }
6102 } else {
6103 self.attribute_bytes_to_overhead();
6104 trace!(
6105 "{} Ignoring window update on closed stream {}: {}",
6106 log_context!(self),
6107 stream_id,
6108 increment
6109 );
6110 // Pass 3 Low #5: WINDOW_UPDATE on a closed stream is legal
6111 // (RFC 9113 §6.9.1) but has no useful effect, so a peer that
6112 // keeps sending them is wasting our cycles. Count it as a
6113 // glitch so a flood contributes to `check_flood()` and can
6114 // eventually trigger ENHANCE_YOUR_CALM.
6115 self.flood_detector.glitch_count += 1;
6116 check_flood_or_return!(self);
6117 }
6118 MuxResult::Continue
6119 }
6120
6121 fn update_initial_window_size<L>(&mut self, value: u32, context: &mut Context<L>) -> bool
6122 where
6123 L: ListenerHandler + L7ListenerHandler,
6124 {
6125 if value > FLOW_CONTROL_MAX_WINDOW {
6126 return true;
6127 }
6128 let delta = match i32::try_from(
6129 value as i64 - self.peer_settings.settings_initial_window_size as i64,
6130 ) {
6131 Ok(d) => d,
6132 Err(_) => {
6133 error!("{} initial window size delta overflow", log_context!(self));
6134 return true;
6135 }
6136 };
6137 let mut open_window = false;
6138 // Only update windows for streams owned by this connection
6139 for &global_stream_id in self.streams.values() {
6140 let stream = &mut context.streams[global_stream_id];
6141 // RFC 9113 §6.9.2: changes to SETTINGS_INITIAL_WINDOW_SIZE can cause
6142 // stream windows to exceed 2^31-1, which is a flow control error.
6143 match stream.window.checked_add(delta) {
6144 Some(new_window) => {
6145 open_window |= stream.window <= 0 && new_window > 0;
6146 stream.window = new_window;
6147 }
6148 None => return true,
6149 }
6150 }
6151 trace!(
6152 "{} UPDATE INIT WINDOW: {} {} {:?}",
6153 log_context!(self),
6154 delta,
6155 open_window,
6156 self.readiness
6157 );
6158 if open_window {
6159 self.readiness.arm_writable();
6160 }
6161 self.peer_settings.settings_initial_window_size = value;
6162 false
6163 }
6164
6165 pub fn force_disconnect(&mut self) -> MuxResult {
6166 self.state = H2State::Error;
6167 match &mut self.position {
6168 Position::Client(_, _, status) => {
6169 *status = BackendStatus::Disconnecting;
6170 self.readiness.event = Ready::HUP;
6171 debug!(
6172 "{} H2 force_disconnect client: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
6173 log_context!(self),
6174 self.state,
6175 self.streams.len(),
6176 self.expect_write,
6177 self.socket.socket_wants_write(),
6178 self.readiness
6179 );
6180 MuxResult::Continue
6181 }
6182 Position::Server => {
6183 if self.peer_gone_after_final_goaway() {
6184 return MuxResult::CloseSession;
6185 }
6186 // Don't disconnect immediately if rustls still has buffered TLS
6187 // records. Returning CloseSession here triggers shutdown(Write)
6188 // which sends FIN — but any TLS records still in rustls's buffer
6189 // (not yet flushed to the TCP send buffer) are lost, causing the
6190 // client to see "TLS decode error / unexpected eof".
6191 // Instead, keep WRITABLE interest and let the writable path flush.
6192 if self.socket.socket_wants_write() {
6193 debug!(
6194 "{} H2 force_disconnect delaying close: state={:?}, streams={}, expect_write={:?}, wants_write=true, readiness={:?}",
6195 log_context!(self),
6196 self.state,
6197 self.streams.len(),
6198 self.expect_write,
6199 self.readiness
6200 );
6201 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
6202 self.ensure_tls_flushed();
6203 MuxResult::Continue
6204 } else {
6205 debug!(
6206 "{} H2 force_disconnect closing session: state={:?}, streams={}, expect_write={:?}, wants_write=false, readiness={:?}",
6207 log_context!(self),
6208 self.state,
6209 self.streams.len(),
6210 self.expect_write,
6211 self.readiness
6212 );
6213 MuxResult::CloseSession
6214 }
6215 }
6216 }
6217 }
6218
6219 pub fn close<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E)
6220 where
6221 E: Endpoint,
6222 L: ListenerHandler + L7ListenerHandler,
6223 {
6224 match self.position {
6225 Position::Client(_, _, BackendStatus::KeepAlive) => {
6226 error!(
6227 "{} H2 connections do not use KeepAlive backend status",
6228 log_context!(self)
6229 );
6230 return;
6231 }
6232 Position::Client(..) => {}
6233 Position::Server => {
6234 let tls_pending_before = self.socket.socket_wants_write();
6235 if !self.streams.is_empty() || tls_pending_before || self.expect_write.is_some() {
6236 debug!(
6237 "{} H2 close with active state: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
6238 log_context!(self),
6239 self.state,
6240 self.streams.len(),
6241 self.expect_write,
6242 tls_pending_before,
6243 self.readiness
6244 );
6245 for (stream_id, global_stream_id) in &self.streams {
6246 let stream = &context.streams[*global_stream_id];
6247 debug!(
6248 "{} close stream id={} gid={}: state={:?}, front_eos={}, back_eos={}, front_phase={:?}, back_phase={:?}, front_completed={}, back_completed={}",
6249 log_context!(self),
6250 stream_id,
6251 global_stream_id,
6252 stream.state,
6253 stream.front_received_end_of_stream,
6254 stream.back_received_end_of_stream,
6255 stream.front.parsing_phase,
6256 stream.back.parsing_phase,
6257 stream.front.is_completed(),
6258 stream.back.is_completed()
6259 );
6260 }
6261 }
6262 if !self.close_notify_sent {
6263 trace!("{} H2 SENDING CLOSE NOTIFY", log_context!(self));
6264 }
6265 let (tls_pending_after, drain_rounds) =
6266 drain_tls_close_notify(&mut self.socket, &mut self.close_notify_sent);
6267 if tls_pending_after {
6268 // Severity tiering: key on stream-count + close-state, not
6269 // peer-vs-operator. Composes with the send-side `H2Error`
6270 // variant tier in `goaway()` — both rules demote benign
6271 // paths and keep loss-bearing paths loud.
6272 //
6273 // - `streams != 0` -> `error!`: live streams at
6274 // close time, response-byte loss is possible.
6275 // - `streams == 0` AND state in {GoAway, Error}
6276 // -> `warn!`: idle close after
6277 // a GOAWAY exchange (peer-initiated abort or our own
6278 // graceful drain). What's stranded is best-effort
6279 // GOAWAY/close_notify; no application data was queued.
6280 // - `streams == 0` from any other state
6281 // -> `error!`: unexpected
6282 // teardown path (no GOAWAY exchange) — keep loud so
6283 // unknown failure modes surface.
6284 if !self.streams.is_empty() {
6285 error!(
6286 "{} TLS buffer NOT fully drained on close: \
6287 pending_before={}, pending_after={}, drain_rounds={}, \
6288 state={:?}, streams={}, expect_write={:?}, \
6289 close_notify_sent={}, readiness={:?}",
6290 log_context!(self),
6291 tls_pending_before,
6292 tls_pending_after,
6293 drain_rounds,
6294 self.state,
6295 self.streams.len(),
6296 self.expect_write,
6297 self.close_notify_sent,
6298 self.readiness
6299 );
6300 } else if matches!(self.state, H2State::GoAway | H2State::Error) {
6301 warn!(
6302 "{} TLS buffer NOT fully drained on close: \
6303 pending_before={}, pending_after={}, drain_rounds={}, \
6304 state={:?}, streams={}, expect_write={:?}, \
6305 close_notify_sent={}, readiness={:?}",
6306 log_context!(self),
6307 tls_pending_before,
6308 tls_pending_after,
6309 drain_rounds,
6310 self.state,
6311 self.streams.len(),
6312 self.expect_write,
6313 self.close_notify_sent,
6314 self.readiness
6315 );
6316 } else {
6317 error!(
6318 "{} TLS buffer NOT fully drained on close: \
6319 pending_before={}, pending_after={}, drain_rounds={}, \
6320 state={:?}, streams={}, expect_write={:?}, \
6321 close_notify_sent={}, readiness={:?}",
6322 log_context!(self),
6323 tls_pending_before,
6324 tls_pending_after,
6325 drain_rounds,
6326 self.state,
6327 self.streams.len(),
6328 self.expect_write,
6329 self.close_notify_sent,
6330 self.readiness
6331 );
6332 }
6333 }
6334 return;
6335 }
6336 }
6337 // reconnection is handled by the server for each stream separately
6338 for global_stream_id in self.streams.values() {
6339 trace!("{} end stream: {}", log_context!(self), global_stream_id);
6340 if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
6341 endpoint.end_stream(token, *global_stream_id, context);
6342 }
6343 }
6344 }
6345
6346 /// Reset a stream: tear down kawa state, emit `RST_STREAM` on the wire,
6347 /// and record MadeYouReset accounting.
6348 ///
6349 /// `wire_stream_id` is the on-wire `StreamId`; `stream_id` is the internal
6350 /// `GlobalStreamId` slot. Callers already carry both so we pass them
6351 /// explicitly rather than scanning `self.streams`. The wire id is threaded
6352 /// into [`Self::enqueue_rst`] which queues the frame for serialisation in
6353 /// [`Self::flush_pending_control_frames`] on the next writable tick —
6354 /// independent of whether the caller immediately evicts the slot via
6355 /// `remove_dead_stream` (which they usually do). This is what guarantees
6356 /// the RST reaches the peer for malformed HEADERS / flow-control /
6357 /// content-length violations flagged by h2spec 2.0.
6358 pub fn reset_stream<E, L>(
6359 &mut self,
6360 wire_stream_id: StreamId,
6361 stream_id: GlobalStreamId,
6362 context: &mut Context<L>,
6363 mut endpoint: E,
6364 error: H2Error,
6365 ) -> MuxResult
6366 where
6367 E: Endpoint,
6368 L: ListenerHandler + L7ListenerHandler,
6369 {
6370 // Compute totals before taking mutable borrows on the target stream.
6371 let reset_byte_totals = self.compute_stream_byte_totals(context);
6372 context.unlink_stream(stream_id);
6373 let stream = &mut context.streams[stream_id];
6374 trace!(
6375 "{} reset H2 stream {}: {:#?}",
6376 log_context!(self),
6377 stream_id,
6378 stream.context
6379 );
6380 let old_state = std::mem::replace(&mut stream.state, StreamState::Unlinked);
6381 forcefully_terminate_answer(stream, &mut self.readiness, error);
6382 let linked_token = if let StreamState::Linked(token) = old_state {
6383 Some(token)
6384 } else {
6385 None
6386 };
6387 let (client_rtt, server_rtt) =
6388 Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
6389 if let Some(token) = linked_token {
6390 endpoint.end_stream(token, stream_id, context);
6391 }
6392 // Emit access log for server-side resets on streams that had active requests
6393 if self.position.is_server()
6394 && matches!(old_state, StreamState::Link | StreamState::Linked(_))
6395 {
6396 let stream = &mut context.streams[stream_id];
6397 self.distribute_overhead(&mut stream.metrics, reset_byte_totals);
6398 stream.metrics.backend_stop();
6399 stream.generate_access_log(
6400 true,
6401 Some("H2::Reset"),
6402 context.listener.clone(),
6403 client_rtt,
6404 server_rtt,
6405 );
6406 stream.metrics.reset();
6407 }
6408 // Queue the RST for wire emission. Independent of the owning stream
6409 // remaining in `self.streams` — callers typically follow this with
6410 // `remove_dead_stream`, which would otherwise evict the slot before
6411 // `write_streams` could run `kawa.prepare` against the converter.
6412 //
6413 // `enqueue_rst` performs every accounting side-effect at queue
6414 // time (per-error counter, global tx counter, CVE-2025-8671
6415 // MadeYouReset lifetime cap). Graceful `NoError` cancels —
6416 // stream recycle, propagated client-side cancel — are exempt
6417 // from the lifetime cap inside the accounting helper itself.
6418 if let Some(result) = self.enqueue_rst(wire_stream_id, error) {
6419 return result;
6420 }
6421 MuxResult::Continue
6422 }
6423
6424 pub fn end_stream<L>(&mut self, stream_gid: GlobalStreamId, context: &mut Context<L>)
6425 where
6426 L: ListenerHandler + L7ListenerHandler,
6427 {
6428 context.unlink_stream(stream_gid);
6429 let stream_context = context.http_context(stream_gid);
6430 trace!(
6431 "{} end H2 stream {}: {:#?}",
6432 log_context!(self),
6433 stream_gid,
6434 stream_context
6435 );
6436 match self.position {
6437 Position::Client(..) => {
6438 // Resolve the wire StreamId for this gid up front so the
6439 // subsequent cleanup does not hold an iterator borrow on
6440 // `self.streams` while also mutating it.
6441 let wire_stream_id = self
6442 .streams
6443 .iter()
6444 .find_map(|(&sid, &gid)| (gid == stream_gid).then_some(sid));
6445 if let Some(id) = wire_stream_id {
6446 // Only send RST_STREAM if the stream hasn't fully completed.
6447 // If both request and response are terminated, the stream is
6448 // already in "closed" state (RFC 9113 §5.1) — sending RST_STREAM
6449 // on a closed stream would be a protocol error that could cause
6450 // the H2 peer to close the entire connection.
6451 let stream = &context.streams[stream_gid];
6452 let fully_completed =
6453 stream.back_received_end_of_stream && stream.front.is_terminated();
6454 if !fully_completed && !self.rst_sent.contains(&id) {
6455 let kawa = &mut self.zero;
6456 let mut frame = [0; 13];
6457 if let Ok((_, _size)) =
6458 serializer::gen_rst_stream(&mut frame, id, H2Error::Cancel)
6459 {
6460 let buf = kawa.storage.space();
6461 if buf.len() >= frame.len() {
6462 buf[..frame.len()].copy_from_slice(&frame);
6463 kawa.storage.fill(frame.len());
6464 incr!(names::h2::FRAMES_TX_RST_STREAM);
6465 count!(metric_for_rst_stream_sent(H2Error::Cancel), 1);
6466 self.readiness.arm_writable();
6467 self.rst_sent.insert(id);
6468 }
6469 }
6470 }
6471 // Retire the stream and invalidate expect_write/expect_read
6472 // if they still reference this gid — the slot may be popped
6473 // by `shrink_trailing_recycle` on the next create_stream.
6474 self.remove_dead_stream(id, stream_gid);
6475 if context.streams[stream_gid].state != StreamState::Recycle {
6476 context.streams[stream_gid].state = StreamState::Unlinked;
6477 }
6478 return;
6479 }
6480 error!(
6481 "{} end_stream called for unknown global_stream_id {}",
6482 log_context!(self),
6483 stream_gid
6484 );
6485 }
6486 Position::Server => {
6487 let answers_rc = context.listener.borrow().get_answers().clone();
6488 let stream = &mut context.streams[stream_gid];
6489 match end_stream_decision(stream) {
6490 EndStreamAction::ForwardTerminated => {
6491 #[cfg(debug_assertions)]
6492 context
6493 .debug
6494 .push(DebugEvent::Str(format!("Close terminated {stream_gid}")));
6495 debug!(
6496 "{} CLOSING H2 TERMINATED STREAM {} {:?}",
6497 log_context!(self),
6498 stream_gid,
6499 stream
6500 );
6501 stream.state = StreamState::Unlinked;
6502 self.readiness.arm_writable();
6503 context.debug.set_interesting(true);
6504 }
6505 EndStreamAction::CloseDelimited => {
6506 debug!(
6507 "{} CLOSE DELIMITED H2 STREAM {} {:?}",
6508 log_context!(self),
6509 stream_gid,
6510 stream
6511 );
6512 stream.back.push_block(kawa::Block::Flags(kawa::Flags {
6513 end_body: true,
6514 end_chunk: false,
6515 end_header: false,
6516 end_stream: true,
6517 }));
6518 stream.back.parsing_phase = kawa::ParsingPhase::Terminated;
6519 stream.state = StreamState::Unlinked;
6520 self.readiness.arm_writable();
6521 context.debug.set_interesting(true);
6522 }
6523 EndStreamAction::ForwardUnterminated => {
6524 #[cfg(debug_assertions)]
6525 context
6526 .debug
6527 .push(DebugEvent::Str(format!("Close unterminated {stream_gid}")));
6528 debug!(
6529 "{} CLOSING H2 UNTERMINATED STREAM {} {:?}",
6530 log_context!(self),
6531 stream_gid,
6532 stream
6533 );
6534 forcefully_terminate_answer(
6535 stream,
6536 &mut self.readiness,
6537 H2Error::InternalError,
6538 );
6539 context.debug.set_interesting(true);
6540 }
6541 EndStreamAction::SendDefault(status) => {
6542 #[cfg(debug_assertions)]
6543 context.debug.push(DebugEvent::Str(format!(
6544 "Can't retry, send {status} on {stream_gid}"
6545 )));
6546 let answers = answers_rc.borrow();
6547 set_default_answer(stream, &mut self.readiness, status, &answers);
6548 }
6549 EndStreamAction::Reconnect => {
6550 debug!("{} H2 RECONNECT", log_context!(self));
6551 #[cfg(debug_assertions)]
6552 context
6553 .debug
6554 .push(DebugEvent::Str(format!("Retry {stream_gid}")));
6555 stream.state = StreamState::Link;
6556 context.pending_links.push_back(stream_gid);
6557 }
6558 }
6559 }
6560 }
6561 }
6562
6563 pub fn start_stream<L>(&mut self, stream: GlobalStreamId, _context: &mut Context<L>) -> bool
6564 where
6565 L: ListenerHandler + L7ListenerHandler,
6566 {
6567 // RFC 9113 §6.8: reject new streams on a draining connection
6568 if self.drain.draining {
6569 error!(
6570 "{} Cannot open new stream on draining connection (stream {})",
6571 log_context!(self),
6572 stream
6573 );
6574 return false;
6575 }
6576 // RFC 9113 §5.1.2: respect peer's max concurrent streams limit
6577 if self.streams.len() >= self.peer_settings.settings_max_concurrent_streams as usize {
6578 error!(
6579 "{} Cannot open new stream: active={} >= peer max_concurrent_streams={}",
6580 log_context!(self),
6581 self.streams.len(),
6582 self.peer_settings.settings_max_concurrent_streams
6583 );
6584 return false;
6585 }
6586 trace!(
6587 "{} start new H2 stream {} {:?}",
6588 log_context!(self),
6589 stream,
6590 self.readiness
6591 );
6592 let Some(stream_id) = self.new_stream_id() else {
6593 // Pass 4 Medium #5: the client-initiated stream-ID space
6594 // (31 bits, odd only) is exhausted. The backend is now useless
6595 // for new requests — gracefully drain it. Without this
6596 // transition, the Connection lingers in `Connected` state and
6597 // every subsequent request returns 503 because `start_stream`
6598 // keeps returning false.
6599 //
6600 // The session envelope is hoisted to a local because the
6601 // `match &mut self.position` below holds a mutable borrow on
6602 // `self.position`, and `log_context!(self)` reads that field
6603 // for its `position={...}` slot — calling the macro inside the
6604 // match arms would conflict with the active borrow. The
6605 // bidirectional regression guard in `lib/tests/log_layout.rs`
6606 // (and the matching scanner in `lib/build.rs`) recognises this
6607 // shape by scanning backward as well as forward from each log
6608 // call.
6609 let context = log_context!(self);
6610 match &mut self.position {
6611 Position::Client(cluster_id, backend, status) => {
6612 let backend_addr = backend.borrow().address;
6613 let cluster = cluster_id.clone();
6614 info!(
6615 "{} H2 backend stream IDs exhausted (cluster={}, backend={:?}) — draining",
6616 context, cluster, backend_addr
6617 );
6618 *status = BackendStatus::Disconnecting;
6619 }
6620 Position::Server => {
6621 error!(
6622 "{} H2 server stream IDs exhausted — sending graceful GOAWAY",
6623 context
6624 );
6625 }
6626 }
6627 self.graceful_goaway();
6628 return false;
6629 };
6630 self.streams.insert(stream_id, stream);
6631 self.stream_last_activity_at
6632 .insert(stream_id, Instant::now());
6633 self.readiness.arm_writable();
6634 true
6635 }
6636}
6637
6638#[cfg(test)]
6639mod tests {
6640 use std::{cell::RefCell, rc::Rc};
6641
6642 use super::*;
6643 use crate::{pool::Pool, protocol::kawa_h1::editor::HttpContext};
6644
6645 // ── H2FloodDetector ──────────────────────────────────────────────────
6646
6647 #[test]
6648 fn test_flood_detector_no_flood_below_threshold() {
6649 let config = H2FloodConfig::default();
6650 let mut detector = H2FloodDetector::new(config);
6651
6652 // All counters at zero -> no flood
6653 assert!(detector.check_flood().is_none());
6654
6655 // Increment each counter to exactly the threshold (not exceeding)
6656 detector.rst_stream_count = config.max_rst_stream_per_window;
6657 detector.ping_count = config.max_ping_per_window;
6658 detector.settings_count = config.max_settings_per_window;
6659 detector.empty_data_count = config.max_empty_data_per_window;
6660 detector.continuation_count = config.max_continuation_frames;
6661 detector.glitch_count = config.max_glitch_count;
6662 // At threshold but not exceeding -> no flood
6663 assert!(detector.check_flood().is_none());
6664 }
6665
6666 #[test]
6667 fn test_flood_detector_detects_rapid_reset() {
6668 let config = H2FloodConfig::default();
6669 let mut detector = H2FloodDetector::new(config);
6670
6671 detector.rst_stream_count = config.max_rst_stream_per_window + 1;
6672 assert!(matches!(
6673 detector.check_flood(),
6674 Some(H2FloodViolation {
6675 error: H2Error::EnhanceYourCalm,
6676 ..
6677 })
6678 ));
6679 }
6680
6681 #[test]
6682 fn test_flood_detector_detects_ping_flood() {
6683 let config = H2FloodConfig::default();
6684 let mut detector = H2FloodDetector::new(config);
6685
6686 detector.ping_count = config.max_ping_per_window + 1;
6687 assert!(matches!(
6688 detector.check_flood(),
6689 Some(H2FloodViolation {
6690 error: H2Error::EnhanceYourCalm,
6691 ..
6692 })
6693 ));
6694 }
6695
6696 #[test]
6697 fn test_flood_detector_detects_settings_flood() {
6698 let config = H2FloodConfig::default();
6699 let mut detector = H2FloodDetector::new(config);
6700
6701 detector.settings_count = config.max_settings_per_window + 1;
6702 assert!(matches!(
6703 detector.check_flood(),
6704 Some(H2FloodViolation {
6705 error: H2Error::EnhanceYourCalm,
6706 ..
6707 })
6708 ));
6709 }
6710
6711 #[test]
6712 fn test_flood_detector_detects_empty_data_flood() {
6713 let config = H2FloodConfig::default();
6714 let mut detector = H2FloodDetector::new(config);
6715
6716 detector.empty_data_count = config.max_empty_data_per_window + 1;
6717 assert!(matches!(
6718 detector.check_flood(),
6719 Some(H2FloodViolation {
6720 error: H2Error::EnhanceYourCalm,
6721 ..
6722 })
6723 ));
6724 }
6725
6726 #[test]
6727 fn test_flood_detector_detects_continuation_flood() {
6728 let config = H2FloodConfig::default();
6729 let mut detector = H2FloodDetector::new(config);
6730
6731 detector.continuation_count = config.max_continuation_frames + 1;
6732 assert!(matches!(
6733 detector.check_flood(),
6734 Some(H2FloodViolation {
6735 error: H2Error::EnhanceYourCalm,
6736 ..
6737 })
6738 ));
6739 }
6740
6741 #[test]
6742 fn test_flood_detector_detects_header_size_flood() {
6743 let config = H2FloodConfig::default();
6744 let mut detector = H2FloodDetector::new(config);
6745
6746 detector.accumulated_header_size = MAX_HEADER_LIST_SIZE as u32 + 1;
6747 assert!(matches!(
6748 detector.check_flood(),
6749 Some(H2FloodViolation {
6750 error: H2Error::EnhanceYourCalm,
6751 ..
6752 })
6753 ));
6754 }
6755
6756 #[test]
6757 fn test_flood_detector_detects_glitch_flood() {
6758 let config = H2FloodConfig::default();
6759 let mut detector = H2FloodDetector::new(config);
6760
6761 detector.glitch_count = config.max_glitch_count + 1;
6762 assert!(matches!(
6763 detector.check_flood(),
6764 Some(H2FloodViolation {
6765 error: H2Error::EnhanceYourCalm,
6766 ..
6767 })
6768 ));
6769 }
6770
6771 #[test]
6772 fn test_flood_detector_custom_thresholds() {
6773 let config = H2FloodConfig {
6774 max_rst_stream_per_window: 5,
6775 max_ping_per_window: 10,
6776 max_settings_per_window: 3,
6777 max_empty_data_per_window: 8,
6778 max_continuation_frames: 2,
6779 max_glitch_count: 15,
6780 ..H2FloodConfig::default()
6781 };
6782 let mut detector = H2FloodDetector::new(config);
6783
6784 // Below custom threshold -> no flood
6785 detector.rst_stream_count = 5;
6786 assert!(detector.check_flood().is_none());
6787
6788 // Above custom threshold -> flood
6789 detector.rst_stream_count = 6;
6790 assert!(matches!(
6791 detector.check_flood(),
6792 Some(H2FloodViolation {
6793 error: H2Error::EnhanceYourCalm,
6794 ..
6795 })
6796 ));
6797 }
6798
6799 #[test]
6800 fn test_flood_detector_reset_continuation() {
6801 let config = H2FloodConfig::default();
6802 let mut detector = H2FloodDetector::new(config);
6803
6804 detector.continuation_count = 15;
6805 detector.accumulated_header_size = 30000;
6806
6807 detector.reset_continuation();
6808
6809 assert_eq!(detector.continuation_count, 0);
6810 assert_eq!(detector.accumulated_header_size, 0);
6811 }
6812
6813 #[test]
6814 fn test_flood_detector_half_decay_on_window_expiry() {
6815 let config = H2FloodConfig::default();
6816 let mut detector = H2FloodDetector::new(config);
6817
6818 detector.rst_stream_count = 80;
6819 detector.ping_count = 60;
6820 detector.settings_count = 40;
6821 detector.empty_data_count = 20;
6822 detector.window_update_stream0_count = 90;
6823 detector.glitch_count = 50;
6824
6825 // Force window expiry by setting window_start to the past
6826 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6827
6828 // check_flood calls maybe_reset_window which halves counters
6829 let _ = detector.check_flood();
6830
6831 assert_eq!(detector.rst_stream_count, 40);
6832 assert_eq!(detector.ping_count, 30);
6833 assert_eq!(detector.settings_count, 20);
6834 assert_eq!(detector.empty_data_count, 10);
6835 assert_eq!(detector.window_update_stream0_count, 45);
6836 assert_eq!(detector.glitch_count, 25);
6837 }
6838
6839 #[test]
6840 fn test_flood_detector_window_update_stream0_trips_at_threshold() {
6841 let config = H2FloodConfig {
6842 max_window_update_stream0_per_window: 5,
6843 ..H2FloodConfig::default()
6844 };
6845 let mut detector = H2FloodDetector::new(config);
6846
6847 // At threshold — no flood yet (strict greater-than, matches existing counters).
6848 detector.window_update_stream0_count = 5;
6849 assert!(detector.check_flood().is_none());
6850
6851 // Above threshold — flood with the correct violation reason + metric key.
6852 detector.window_update_stream0_count = 6;
6853 let violation = detector
6854 .check_flood()
6855 .expect("WINDOW_UPDATE stream-0 flood must trip above threshold");
6856 assert_eq!(violation.error, H2Error::EnhanceYourCalm);
6857 assert_eq!(violation.reason, "WINDOW_UPDATE stream 0");
6858 assert_eq!(
6859 violation.metric_key,
6860 "h2.flood.violation.window_update_stream0_window"
6861 );
6862 assert_eq!(violation.count, 6);
6863 assert_eq!(violation.threshold, 5);
6864 }
6865
6866 #[test]
6867 fn test_flood_detector_window_update_stream0_honours_default() {
6868 // Default threshold must match the documented constant so operators
6869 // can reason about behaviour without reading code.
6870 let detector = H2FloodDetector::default();
6871 assert_eq!(
6872 detector.config.max_window_update_stream0_per_window,
6873 DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW
6874 );
6875 assert_eq!(detector.window_update_stream0_count, 0);
6876 }
6877
6878 #[test]
6879 fn test_flood_detector_decay_prevents_flood() {
6880 let config = H2FloodConfig {
6881 max_rst_stream_per_window: 10,
6882 ..H2FloodConfig::default()
6883 };
6884 let mut detector = H2FloodDetector::new(config);
6885
6886 // Set counter just above threshold
6887 detector.rst_stream_count = 12;
6888
6889 // Without decay -> flood
6890 assert!(matches!(
6891 detector.check_flood(),
6892 Some(H2FloodViolation {
6893 error: H2Error::EnhanceYourCalm,
6894 ..
6895 })
6896 ));
6897
6898 // Reset and simulate window expiry
6899 detector.rst_stream_count = 12;
6900 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6901
6902 // After decay: 12/2 = 6, which is below threshold 10 -> no flood
6903 assert!(detector.check_flood().is_none());
6904 }
6905
6906 #[test]
6907 fn test_flood_detector_lifetime_rst_cap_triggers_enhance_your_calm() {
6908 // CVE-2023-44487 Rapid Reset: a patient attacker that stays under
6909 // the half-decaying per-window threshold must still be stopped by
6910 // the lifetime cap. Simulate a response-started RST (no abusive
6911 // counter bump) so only the lifetime ceiling is tested.
6912 let mut detector = H2FloodDetector::default();
6913 for _ in 0..DEFAULT_MAX_RST_STREAM_LIFETIME {
6914 assert!(detector.record_rst_lifetime(true).is_none());
6915 }
6916 assert_eq!(
6917 detector.total_rst_received_lifetime,
6918 DEFAULT_MAX_RST_STREAM_LIFETIME
6919 );
6920 assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6921 // Next RST crosses the ceiling.
6922 assert!(matches!(
6923 detector.record_rst_lifetime(true),
6924 Some(H2FloodViolation {
6925 error: H2Error::EnhanceYourCalm,
6926 ..
6927 })
6928 ));
6929 }
6930
6931 #[test]
6932 fn test_flood_detector_abusive_rst_cap_triggers_first() {
6933 // Pre-response-start RSTs have a much lower ceiling; they trip
6934 // well before the generic lifetime cap.
6935 let mut detector = H2FloodDetector::default();
6936 for _ in 0..DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME {
6937 assert!(detector.record_rst_lifetime(false).is_none());
6938 }
6939 assert_eq!(
6940 detector.total_abusive_rst_received_lifetime,
6941 DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME
6942 );
6943 assert!(matches!(
6944 detector.record_rst_lifetime(false),
6945 Some(H2FloodViolation {
6946 error: H2Error::EnhanceYourCalm,
6947 ..
6948 })
6949 ));
6950 }
6951
6952 #[test]
6953 fn test_flood_detector_emitted_rst_below_threshold_is_clean() {
6954 // Server may legitimately RST some streams (protocol errors,
6955 // client-side abuse caught by other mitigations). Staying at the
6956 // threshold must not trip the ceiling.
6957 let mut detector = H2FloodDetector::default();
6958 for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6959 assert!(detector.record_rst_emitted().is_none());
6960 }
6961 assert_eq!(
6962 detector.total_rst_streams_emitted_lifetime,
6963 DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME
6964 );
6965 }
6966
6967 #[test]
6968 fn test_flood_detector_emitted_rst_cap_triggers_made_you_reset() {
6969 // CVE-2025-8671 MadeYouReset: unbounded server-emitted RST_STREAM is
6970 // a DoS vector equivalent to Rapid Reset with the emission direction
6971 // flipped. Crossing the ceiling must surface a EnhanceYourCalm
6972 // violation so the caller can GOAWAY.
6973 let mut detector = H2FloodDetector::default();
6974 for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6975 assert!(detector.record_rst_emitted().is_none());
6976 }
6977 let violation = detector
6978 .record_rst_emitted()
6979 .expect("emitting past the cap should produce a violation");
6980 assert!(matches!(
6981 violation,
6982 H2FloodViolation {
6983 error: H2Error::EnhanceYourCalm,
6984 reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
6985 ..
6986 }
6987 ));
6988 assert_eq!(violation.count, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME + 1);
6989 assert_eq!(violation.threshold, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME);
6990 }
6991
6992 #[test]
6993 fn test_flood_detector_emitted_rst_counter_does_not_decay() {
6994 // Unlike the windowed rst_stream_count, the emitted lifetime counter
6995 // is strictly monotonic — a patient attacker cannot reset it by
6996 // waiting out a window. maybe_reset_window must NOT touch it.
6997 let mut detector = H2FloodDetector::default();
6998 for _ in 0..10 {
6999 detector.record_rst_emitted();
7000 }
7001 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
7002 // Force a window reset through check_flood.
7003 let _ = detector.check_flood();
7004 assert_eq!(detector.total_rst_streams_emitted_lifetime, 10);
7005 }
7006
7007 /// Every violation kind must carry a metric_key under the agreed
7008 /// `h2.flood.violation.*` namespace, and the keys must be unique. The
7009 /// statsd counter at `handle_flood_violation` reads `violation.metric_key`
7010 /// directly — drift between the construction site and the metric name
7011 /// would silently lose alerting on a CVE mitigation.
7012 #[test]
7013 fn test_flood_violation_metric_keys_are_unique_and_namespaced() {
7014 // Helper: run `record_rst_lifetime` until it trips, returning the metric_key.
7015 fn key_from_rst_lifetime(response_started: bool) -> &'static str {
7016 let mut detector = H2FloodDetector::default();
7017 loop {
7018 if let Some(v) = detector.record_rst_lifetime(response_started) {
7019 return v.metric_key;
7020 }
7021 }
7022 }
7023
7024 // Helper: run `record_rst_emitted` until it trips, returning the metric_key.
7025 fn key_from_rst_emitted() -> &'static str {
7026 let mut detector = H2FloodDetector::default();
7027 loop {
7028 if let Some(v) = detector.record_rst_emitted() {
7029 return v.metric_key;
7030 }
7031 }
7032 }
7033
7034 // Helper: drive a single `check_flood` counter past its threshold.
7035 fn key_from_check_flood(setup: impl FnOnce(&mut H2FloodDetector)) -> &'static str {
7036 let mut detector = H2FloodDetector::default();
7037 setup(&mut detector);
7038 detector
7039 .check_flood()
7040 .expect("setup should always trip a flood")
7041 .metric_key
7042 }
7043
7044 let keys: [&'static str; 12] = [
7045 // Lifetime methods on the detector itself.
7046 key_from_rst_lifetime(true),
7047 key_from_rst_lifetime(false),
7048 key_from_rst_emitted(),
7049 // `check_flood` arms.
7050 key_from_check_flood(|d| d.rst_stream_count = u32::MAX),
7051 key_from_check_flood(|d| d.ping_count = u32::MAX),
7052 key_from_check_flood(|d| d.total_ping_received_lifetime = u32::MAX),
7053 key_from_check_flood(|d| d.settings_count = u32::MAX),
7054 key_from_check_flood(|d| d.total_settings_received_lifetime = u32::MAX),
7055 key_from_check_flood(|d| d.empty_data_count = u32::MAX),
7056 key_from_check_flood(|d| d.continuation_count = u32::MAX),
7057 key_from_check_flood(|d| d.accumulated_header_size = u32::MAX),
7058 key_from_check_flood(|d| d.glitch_count = u32::MAX),
7059 ];
7060
7061 for key in keys {
7062 assert!(
7063 key.starts_with("h2.flood.violation."),
7064 "metric key {key} is missing the h2.flood.violation. prefix",
7065 );
7066 }
7067 let mut deduped = keys.to_vec();
7068 deduped.sort_unstable();
7069 deduped.dedup();
7070 assert_eq!(
7071 deduped.len(),
7072 keys.len(),
7073 "metric keys must be unique across violation kinds; collisions: {keys:?}",
7074 );
7075 }
7076
7077 /// All four `metric_for_*` helpers must yield distinct, namespaced keys for
7078 /// every RFC 9113 §7 error code. The macro behind them uses `concat!`, so a
7079 /// new H2Error variant fails the build inside the macro — but a typo in
7080 /// the helper prefix would silently land. Walk every (direction × kind)
7081 /// pair and dedupe the set.
7082 /// `h2_frame_rx_metric_key` must yield a distinct `&'static str` per
7083 /// `Frame::*` variant. The single dispatch site in `handle_frame` reads
7084 /// from this helper, so a typo or duplicate would silently clobber the
7085 /// frame-mix dashboard. Asserting the literal set lets us compare against
7086 /// `doc/configure.md` and the RFC 9113 §6 frame catalogue without
7087 /// reconstructing every Frame variant in the test.
7088 #[test]
7089 fn test_h2_frame_rx_metric_keys_are_unique_and_namespaced() {
7090 // Update this list whenever a new Frame variant is added — the helper
7091 // match is also exhaustive, so the build will already break there
7092 // before anyone notices the test missing a key.
7093 let expected: [&'static str; 11] = [
7094 "h2.frames.rx.data",
7095 "h2.frames.rx.headers",
7096 "h2.frames.rx.push_promise",
7097 "h2.frames.rx.priority",
7098 "h2.frames.rx.rst_stream",
7099 "h2.frames.rx.settings",
7100 "h2.frames.rx.ping",
7101 "h2.frames.rx.goaway",
7102 "h2.frames.rx.window_update",
7103 "h2.frames.rx.continuation",
7104 "h2.frames.rx.unknown",
7105 ];
7106
7107 for key in expected {
7108 assert!(
7109 key.starts_with("h2.frames.rx."),
7110 "metric key {key} is missing the h2.frames.rx. prefix",
7111 );
7112 }
7113 let mut deduped = expected.to_vec();
7114 deduped.sort_unstable();
7115 deduped.dedup();
7116 assert_eq!(
7117 deduped.len(),
7118 expected.len(),
7119 "frame-rx metric keys must be unique; collisions in: {expected:?}",
7120 );
7121
7122 // Spot-check the helper for the one variant we can construct without
7123 // borrowing into a frame body — `Frame::Unknown(u8)` is just a tag.
7124 assert_eq!(
7125 h2_frame_rx_metric_key(&Frame::Unknown(42)),
7126 "h2.frames.rx.unknown",
7127 );
7128 }
7129
7130 #[test]
7131 fn test_per_error_code_metric_keys_are_unique_and_namespaced() {
7132 const ALL_ERRORS: [H2Error; 14] = [
7133 H2Error::NoError,
7134 H2Error::ProtocolError,
7135 H2Error::InternalError,
7136 H2Error::FlowControlError,
7137 H2Error::SettingsTimeout,
7138 H2Error::StreamClosed,
7139 H2Error::FrameSizeError,
7140 H2Error::RefusedStream,
7141 H2Error::Cancel,
7142 H2Error::CompressionError,
7143 H2Error::ConnectError,
7144 H2Error::EnhanceYourCalm,
7145 H2Error::InadequateSecurity,
7146 H2Error::HTTP11Required,
7147 ];
7148
7149 let mut keys: Vec<&'static str> = Vec::new();
7150 for error in ALL_ERRORS {
7151 let code = error as u32;
7152 keys.push(metric_for_goaway_sent(error));
7153 keys.push(metric_for_goaway_received(code));
7154 keys.push(metric_for_rst_stream_sent(error));
7155 keys.push(metric_for_rst_stream_received(code));
7156 }
7157 // …plus the four `unknown_error` fallbacks for codes outside RFC 9113 §7.
7158 let unknown_code = 0xff;
7159 assert!(H2Error::try_from(unknown_code).is_err());
7160 keys.push(metric_for_goaway_received(unknown_code));
7161 keys.push(metric_for_rst_stream_received(unknown_code));
7162 // …and the dedicated Rapid Reset signature counter.
7163 keys.push(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START);
7164
7165 for key in &keys {
7166 assert!(
7167 key.starts_with("h2.goaway.sent.")
7168 || key.starts_with("h2.goaway.received.")
7169 || key.starts_with("h2.rst_stream.sent.")
7170 || key.starts_with("h2.rst_stream.received."),
7171 "metric key {key} does not match a known per-error-code namespace",
7172 );
7173 }
7174 let mut deduped = keys.clone();
7175 deduped.sort_unstable();
7176 deduped.dedup();
7177 assert_eq!(
7178 deduped.len(),
7179 keys.len(),
7180 "per-error-code metric keys must be unique; collisions in: {keys:?}",
7181 );
7182 }
7183
7184 #[test]
7185 fn test_flood_detector_response_started_rst_not_abusive() {
7186 // When the backend response has begun, the RST is cheap for us
7187 // too — it only bumps the generic lifetime counter.
7188 let mut detector = H2FloodDetector::default();
7189 for _ in 0..(DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100) {
7190 assert!(detector.record_rst_lifetime(true).is_none());
7191 }
7192 assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
7193 assert_eq!(
7194 detector.total_rst_received_lifetime,
7195 DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100
7196 );
7197 }
7198
7199 #[test]
7200 fn test_flood_detector_default_matches_new_default() {
7201 let from_default = H2FloodDetector::default();
7202 let from_new = H2FloodDetector::new(H2FloodConfig::default());
7203
7204 assert_eq!(from_default.rst_stream_count, from_new.rst_stream_count);
7205 assert_eq!(from_default.ping_count, from_new.ping_count);
7206 assert_eq!(from_default.settings_count, from_new.settings_count);
7207 assert_eq!(from_default.empty_data_count, from_new.empty_data_count);
7208 assert_eq!(from_default.continuation_count, from_new.continuation_count);
7209 assert_eq!(
7210 from_default.accumulated_header_size,
7211 from_new.accumulated_header_size
7212 );
7213 assert_eq!(from_default.glitch_count, from_new.glitch_count);
7214 assert_eq!(from_default.config, from_new.config);
7215 }
7216
7217 // ── Prioriser ────────────────────────────────────────────────────────
7218
7219 #[test]
7220 fn test_prioriser_defaults_for_unknown_stream() {
7221 let p = Prioriser::default();
7222 // Unknown stream -> RFC 9218 defaults: urgency 3, incremental false
7223 assert_eq!(p.get(&1), (3, false));
7224 assert_eq!(p.get(&999), (3, false));
7225 }
7226
7227 #[test]
7228 fn test_prioriser_push_rfc9218_and_get() {
7229 let mut p = Prioriser::default();
7230
7231 let invalid = p.push_priority(
7232 1,
7233 parser::PriorityPart::Rfc9218 {
7234 urgency: 0,
7235 incremental: true,
7236 },
7237 );
7238 assert!(!invalid);
7239 assert_eq!(p.get(&1), (0, true));
7240
7241 let invalid = p.push_priority(
7242 3,
7243 parser::PriorityPart::Rfc9218 {
7244 urgency: 7,
7245 incremental: false,
7246 },
7247 );
7248 assert!(!invalid);
7249 assert_eq!(p.get(&3), (7, false));
7250 }
7251
7252 #[test]
7253 fn test_prioriser_urgency_clamped_to_7() {
7254 let mut p = Prioriser::default();
7255
7256 p.push_priority(
7257 1,
7258 parser::PriorityPart::Rfc9218 {
7259 urgency: 255,
7260 incremental: false,
7261 },
7262 );
7263 assert_eq!(p.get(&1), (7, false));
7264 }
7265
7266 #[test]
7267 fn test_prioriser_update_priority() {
7268 let mut p = Prioriser::default();
7269
7270 p.push_priority(
7271 1,
7272 parser::PriorityPart::Rfc9218 {
7273 urgency: 3,
7274 incremental: false,
7275 },
7276 );
7277 assert_eq!(p.get(&1), (3, false));
7278
7279 // Update same stream
7280 p.push_priority(
7281 1,
7282 parser::PriorityPart::Rfc9218 {
7283 urgency: 1,
7284 incremental: true,
7285 },
7286 );
7287 assert_eq!(p.get(&1), (1, true));
7288 }
7289
7290 #[test]
7291 fn test_prioriser_remove() {
7292 let mut p = Prioriser::default();
7293
7294 p.push_priority(
7295 1,
7296 parser::PriorityPart::Rfc9218 {
7297 urgency: 0,
7298 incremental: true,
7299 },
7300 );
7301 assert_eq!(p.get(&1), (0, true));
7302
7303 p.remove(&1);
7304 // After removal, falls back to defaults
7305 assert_eq!(p.get(&1), (3, false));
7306 }
7307
7308 #[test]
7309 fn test_prioriser_rfc7540_self_dependency() {
7310 let mut p = Prioriser::default();
7311
7312 // Self-dependency should return true (invalid)
7313 let invalid = p.push_priority(
7314 5,
7315 parser::PriorityPart::Rfc7540 {
7316 stream_dependency: parser::StreamDependency {
7317 exclusive: false,
7318 stream_id: 5, // same as stream_id
7319 },
7320 weight: 16,
7321 },
7322 );
7323 assert!(invalid);
7324 }
7325
7326 #[test]
7327 fn test_prioriser_rfc7540_valid_dependency() {
7328 let mut p = Prioriser::default();
7329
7330 // Non-self dependency is valid (but ignored for scheduling)
7331 let invalid = p.push_priority(
7332 5,
7333 parser::PriorityPart::Rfc7540 {
7334 stream_dependency: parser::StreamDependency {
7335 exclusive: false,
7336 stream_id: 3, // different stream
7337 },
7338 weight: 16,
7339 },
7340 );
7341 assert!(!invalid);
7342 // Still returns defaults since RFC 7540 priority is ignored
7343 assert_eq!(p.get(&5), (3, false));
7344 }
7345
7346 #[test]
7347 fn test_prioriser_max_entries_cap() {
7348 let mut p = Prioriser::default();
7349
7350 // Fill up to MAX_PRIORITIES
7351 for i in 0..MAX_PRIORITIES as u32 {
7352 let stream_id = i * 2 + 1; // odd stream IDs
7353 p.push_priority(
7354 stream_id,
7355 parser::PriorityPart::Rfc9218 {
7356 urgency: (i % 8) as u8,
7357 incremental: false,
7358 },
7359 );
7360 }
7361
7362 // Next insert for a new stream should be silently rejected
7363 let next_id = (MAX_PRIORITIES as u32) * 2 + 1;
7364 let invalid = p.push_priority(
7365 next_id,
7366 parser::PriorityPart::Rfc9218 {
7367 urgency: 0,
7368 incremental: true,
7369 },
7370 );
7371 assert!(!invalid); // not a protocol error, just silently dropped
7372 assert_eq!(p.get(&next_id), (3, false)); // defaults, not stored
7373 }
7374
7375 #[test]
7376 fn test_prioriser_update_existing_at_cap() {
7377 let mut p = Prioriser::default();
7378
7379 // Fill to cap
7380 for i in 0..MAX_PRIORITIES as u32 {
7381 p.push_priority(
7382 i * 2 + 1,
7383 parser::PriorityPart::Rfc9218 {
7384 urgency: 3,
7385 incremental: false,
7386 },
7387 );
7388 }
7389
7390 // Updating an existing entry should still work even at cap
7391 p.push_priority(
7392 1,
7393 parser::PriorityPart::Rfc9218 {
7394 urgency: 0,
7395 incremental: true,
7396 },
7397 );
7398 assert_eq!(p.get(&1), (0, true));
7399 }
7400
7401 #[test]
7402 fn test_prioriser_guarded_accepts_open_stream() {
7403 let mut p = Prioriser::default();
7404 let mut open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7405 open.insert(3, 0);
7406 let invalid = p.push_priority_guarded(
7407 3,
7408 parser::PriorityPart::Rfc9218 {
7409 urgency: 1,
7410 incremental: false,
7411 },
7412 7,
7413 &open,
7414 );
7415 assert!(!invalid);
7416 assert_eq!(p.get(&3), (1, false));
7417 }
7418
7419 #[test]
7420 fn test_prioriser_guarded_accepts_idle_lookahead() {
7421 let mut p = Prioriser::default();
7422 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7423 // Just ahead of last_stream_id, within PRIORITY_IDLE_LOOKAHEAD.
7424 let invalid = p.push_priority_guarded(
7425 105,
7426 parser::PriorityPart::Rfc9218 {
7427 urgency: 2,
7428 incremental: true,
7429 },
7430 99,
7431 &open,
7432 );
7433 assert!(!invalid);
7434 assert_eq!(p.get(&105), (2, true));
7435 }
7436
7437 #[test]
7438 fn test_prioriser_guarded_drops_far_future_stream() {
7439 let mut p = Prioriser::default();
7440 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7441 // Beyond the 64-slot lookahead window.
7442 let invalid = p.push_priority_guarded(
7443 1_000_001,
7444 parser::PriorityPart::Rfc9218 {
7445 urgency: 0,
7446 incremental: false,
7447 },
7448 3,
7449 &open,
7450 );
7451 assert!(!invalid); // not a protocol error, just dropped
7452 // Default priority returned — no entry stored.
7453 assert_eq!(p.get(&1_000_001), (DEFAULT_URGENCY, false));
7454 }
7455
7456 #[test]
7457 fn test_prioriser_guarded_drops_closed_past_stream() {
7458 let mut p = Prioriser::default();
7459 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7460 // Past the counter and not open = already closed. Drop.
7461 let invalid = p.push_priority_guarded(
7462 3,
7463 parser::PriorityPart::Rfc9218 {
7464 urgency: 5,
7465 incremental: false,
7466 },
7467 99,
7468 &open,
7469 );
7470 assert!(!invalid);
7471 assert_eq!(p.get(&3), (DEFAULT_URGENCY, false));
7472 }
7473
7474 #[test]
7475 fn test_prioriser_guarded_cannot_flood_with_far_ids() {
7476 // Previously an attacker could pack MAX_PRIORITIES entries by picking
7477 // far-future stream IDs. The guard rejects them before the cap helps.
7478 let mut p = Prioriser::default();
7479 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7480 for delta in 10_000..(10_000 + MAX_PRIORITIES as u32) {
7481 p.push_priority_guarded(
7482 delta,
7483 parser::PriorityPart::Rfc9218 {
7484 urgency: 0,
7485 incremental: false,
7486 },
7487 0,
7488 &open,
7489 );
7490 }
7491 assert_eq!(p.priorities.len(), 0);
7492 }
7493
7494 // ── RFC 9218 §4 round-robin rotation ───────────────────────────────
7495
7496 /// Helper: mark `stream_id` as (urgency, incremental) in the map.
7497 fn set_prio(p: &mut Prioriser, stream_id: StreamId, urgency: u8, incremental: bool) {
7498 p.push_priority(
7499 stream_id,
7500 parser::PriorityPart::Rfc9218 {
7501 urgency,
7502 incremental,
7503 },
7504 );
7505 }
7506
7507 #[test]
7508 fn test_apply_incremental_rotation_all_non_incremental_is_noop() {
7509 // Non-incremental streams keep the existing (urgency, stream_id) sort.
7510 let mut p = Prioriser::default();
7511 set_prio(&mut p, 1, 3, false);
7512 set_prio(&mut p, 3, 3, false);
7513 set_prio(&mut p, 5, 3, false);
7514
7515 let mut buf = vec![1u32, 3, 5];
7516 let count = p.apply_incremental_rotation(&mut buf);
7517 assert_eq!(count, 0);
7518 assert_eq!(buf, vec![1, 3, 5]);
7519 }
7520
7521 #[test]
7522 fn test_apply_incremental_rotation_moves_incremental_to_tail() {
7523 // Within a same-urgency bucket non-incremental must come before
7524 // incremental, each subrange staying ascending.
7525 let mut p = Prioriser::default();
7526 set_prio(&mut p, 1, 3, true);
7527 set_prio(&mut p, 3, 3, false);
7528 set_prio(&mut p, 5, 3, true);
7529 set_prio(&mut p, 7, 3, false);
7530
7531 let mut buf = vec![1u32, 3, 5, 7];
7532 let count = p.apply_incremental_rotation(&mut buf);
7533 assert_eq!(count, 2);
7534 // Non-incremental first (3, 7), then incremental (1, 5) — ascending
7535 // within each subrange before the cursor rotation.
7536 assert_eq!(buf, vec![3, 7, 1, 5]);
7537 }
7538
7539 #[test]
7540 fn test_apply_incremental_rotation_respects_urgency_buckets() {
7541 // Different urgency buckets must not be mixed.
7542 let mut p = Prioriser::default();
7543 set_prio(&mut p, 1, 0, true); // urgent incremental
7544 set_prio(&mut p, 3, 3, false); // default non-incremental
7545 set_prio(&mut p, 5, 3, true); // default incremental
7546 set_prio(&mut p, 7, 5, false); // low-priority non-incremental
7547
7548 // Input is pre-sorted by (urgency, id) as the scheduler does.
7549 let mut buf = vec![1u32, 3, 5, 7];
7550 let count = p.apply_incremental_rotation(&mut buf);
7551 assert_eq!(count, 2);
7552 // Bucket 0: [1] (alone, stays). Bucket 3: [3] non-inc, [5] inc.
7553 // Bucket 5: [7] alone. Cross-bucket order is preserved.
7554 assert_eq!(buf, vec![1, 3, 5, 7]);
7555 }
7556
7557 #[test]
7558 fn test_apply_incremental_rotation_rotates_by_cursor() {
7559 // Three same-urgency incremental streams: cursor advancement shifts
7560 // the bucket so the next pass starts after the previously fired ID.
7561 let mut p = Prioriser::default();
7562 set_prio(&mut p, 1, 3, true);
7563 set_prio(&mut p, 3, 3, true);
7564 set_prio(&mut p, 5, 3, true);
7565
7566 let base = vec![1u32, 3, 5];
7567
7568 // Pass 1: cursor is 0 (initial), so order stays 1, 3, 5.
7569 let mut buf = base.clone();
7570 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7571 assert_eq!(buf, vec![1, 3, 5]);
7572 p.advance_incremental_cursor(Some(1));
7573
7574 // Pass 2: cursor is 1, rotate so 3 comes first.
7575 let mut buf = base.clone();
7576 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7577 assert_eq!(buf, vec![3, 5, 1]);
7578 p.advance_incremental_cursor(Some(3));
7579
7580 // Pass 3: cursor is 3, rotate so 5 comes first.
7581 let mut buf = base.clone();
7582 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7583 assert_eq!(buf, vec![5, 1, 3]);
7584 p.advance_incremental_cursor(Some(5));
7585
7586 // Pass 4: cursor is 5 (largest in bucket), wrap to 1.
7587 let mut buf = base;
7588 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7589 assert_eq!(buf, vec![1, 3, 5]);
7590 }
7591
7592 #[test]
7593 fn test_apply_incremental_rotation_cursor_unknown_id() {
7594 // Cursor points at an ID no longer active (stream completed). Rotation
7595 // should still start from the smallest ID greater than the cursor.
7596 let mut p = Prioriser::default();
7597 set_prio(&mut p, 3, 3, true);
7598 set_prio(&mut p, 5, 3, true);
7599 set_prio(&mut p, 7, 3, true);
7600 p.advance_incremental_cursor(Some(4)); // 4 is not in the bucket
7601
7602 let mut buf = vec![3u32, 5, 7];
7603 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
7604 assert_eq!(buf, vec![5, 7, 3]);
7605 }
7606
7607 #[test]
7608 fn test_apply_incremental_rotation_single_stream_buckets() {
7609 // Single-stream buckets are a degenerate fast path: no reordering.
7610 let mut p = Prioriser::default();
7611 set_prio(&mut p, 1, 1, true);
7612 set_prio(&mut p, 3, 2, false);
7613 set_prio(&mut p, 5, 3, true);
7614
7615 let mut buf = vec![1u32, 3, 5];
7616 let count = p.apply_incremental_rotation(&mut buf);
7617 assert_eq!(count, 2);
7618 assert_eq!(buf, vec![1, 3, 5]);
7619 }
7620
7621 #[test]
7622 fn test_advance_incremental_cursor_none_is_noop() {
7623 // If no incremental stream fires (only non-incremental served), the
7624 // cursor must stay put so fairness is preserved for the next pass.
7625 let mut p = Prioriser::default();
7626 p.advance_incremental_cursor(Some(5));
7627 p.advance_incremental_cursor(None);
7628 assert_eq!(p.incremental_cursor, 5);
7629 }
7630
7631 #[test]
7632 fn test_apply_incremental_rotation_mixed_bucket_with_cursor() {
7633 // Same-urgency bucket with a mix: non-inc served first in ascending
7634 // order, then the incremental tail rotated by cursor.
7635 let mut p = Prioriser::default();
7636 set_prio(&mut p, 1, 3, true);
7637 set_prio(&mut p, 3, 3, false);
7638 set_prio(&mut p, 5, 3, true);
7639 set_prio(&mut p, 7, 3, false);
7640 set_prio(&mut p, 9, 3, true);
7641 p.advance_incremental_cursor(Some(5));
7642
7643 let mut buf = vec![1u32, 3, 5, 7, 9];
7644 let count = p.apply_incremental_rotation(&mut buf);
7645 assert_eq!(count, 3);
7646 // Non-inc (3, 7) first, then incremental rotated: cursor 5 means
7647 // next-after-5 = 9, then 1, then 5 (wrap).
7648 assert_eq!(buf, vec![3, 7, 9, 1, 5]);
7649 }
7650
7651 // ── H2FlowControl ───────────────────────────────────────────────────
7652
7653 #[test]
7654 fn test_flow_control_initial_state() {
7655 let fc = H2FlowControl {
7656 window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
7657 received_bytes_since_update: 0,
7658 pending_window_updates: HashMap::new(),
7659 };
7660 assert_eq!(fc.window, 65535);
7661 assert_eq!(fc.received_bytes_since_update, 0);
7662 assert!(fc.pending_window_updates.is_empty());
7663 }
7664
7665 #[test]
7666 fn test_flow_control_window_update_coalescing() {
7667 let mut updates: HashMap<u32, u32> = HashMap::new();
7668
7669 // First update for stream 1
7670 updates.insert(1, 1000);
7671 assert_eq!(*updates.get(&1).unwrap(), 1000);
7672
7673 // Coalesce second update for same stream
7674 if let Some(existing) = updates.get_mut(&1) {
7675 *existing = existing.saturating_add(500).min(i32::MAX as u32);
7676 }
7677 assert_eq!(*updates.get(&1).unwrap(), 1500);
7678
7679 // Different stream gets its own entry
7680 updates.insert(3, 2000);
7681 assert_eq!(updates.len(), 2);
7682 assert_eq!(*updates.get(&3).unwrap(), 2000);
7683 }
7684
7685 #[test]
7686 fn test_flow_control_window_update_saturation() {
7687 let mut updates: HashMap<u32, u32> = HashMap::new();
7688
7689 // Insert near max and coalesce — should saturate to i32::MAX
7690 let max_increment = i32::MAX as u32;
7691 updates.insert(1, max_increment - 100);
7692 if let Some(existing) = updates.get_mut(&1) {
7693 *existing = existing.saturating_add(200).min(max_increment);
7694 }
7695 assert_eq!(*updates.get(&1).unwrap(), max_increment);
7696 }
7697
7698 #[test]
7699 fn test_flow_control_connection_window_can_go_negative() {
7700 // RFC 9113 §6.9.2: connection-level window can go negative
7701 let mut fc = H2FlowControl {
7702 window: 100,
7703 received_bytes_since_update: 0,
7704 pending_window_updates: HashMap::new(),
7705 };
7706
7707 // Simulate consuming more than available
7708 fc.window -= 200;
7709 assert_eq!(fc.window, -100);
7710 }
7711
7712 // ── H2FloodConfig ───────────────────────────────────────────────────
7713
7714 #[test]
7715 fn test_flood_config_default_values() {
7716 let config = H2FloodConfig::default();
7717 assert_eq!(config.max_rst_stream_per_window, 100);
7718 assert_eq!(config.max_ping_per_window, 100);
7719 assert_eq!(config.max_settings_per_window, 50);
7720 assert_eq!(config.max_empty_data_per_window, 100);
7721 assert_eq!(config.max_continuation_frames, 20);
7722 assert_eq!(config.max_glitch_count, 100);
7723 assert_eq!(config.max_rst_stream_lifetime, 10_000);
7724 assert_eq!(config.max_rst_stream_abusive_lifetime, 50);
7725 assert_eq!(config.max_header_list_size, MAX_HEADER_LIST_SIZE as u32);
7726 }
7727
7728 // ── distribute_overhead ─────────────────────────────────────────────
7729
7730 #[test]
7731 fn test_distribute_overhead_proportional() {
7732 let mut metrics = SessionMetrics::new(None);
7733 let mut overhead_bin = 1000;
7734 let mut overhead_bout = 500;
7735
7736 // Stream transferred 60% of total bytes (not last stream)
7737 distribute_overhead(
7738 &mut metrics,
7739 &mut overhead_bin,
7740 &mut overhead_bout,
7741 (600, 300), // stream_bytes
7742 (1000, 500), // total_bytes
7743 2, // active_streams
7744 false, // is_last_stream
7745 );
7746
7747 assert_eq!(metrics.bin, 600); // 60% of 1000
7748 assert_eq!(metrics.bout, 300); // 60% of 500
7749 assert_eq!(overhead_bin, 400); // 1000 - 600
7750 assert_eq!(overhead_bout, 200); // 500 - 300
7751 }
7752
7753 #[test]
7754 fn test_distribute_overhead_even_split_when_no_bytes() {
7755 let mut metrics = SessionMetrics::new(None);
7756 let mut overhead_bin = 100;
7757 let mut overhead_bout = 200;
7758
7759 // No bytes transferred -> even distribution (not last stream)
7760 distribute_overhead(
7761 &mut metrics,
7762 &mut overhead_bin,
7763 &mut overhead_bout,
7764 (0, 0), // stream_bytes
7765 (0, 0), // total_bytes
7766 4, // active_streams
7767 false, // is_last_stream
7768 );
7769
7770 assert_eq!(metrics.bin, 25); // 100 / 4
7771 assert_eq!(metrics.bout, 50); // 200 / 4
7772 assert_eq!(overhead_bin, 75);
7773 assert_eq!(overhead_bout, 150);
7774 }
7775
7776 #[test]
7777 fn test_distribute_overhead_clamps_to_remaining() {
7778 let mut metrics = SessionMetrics::new(None);
7779 let mut overhead_bin = 10;
7780 let mut overhead_bout = 10;
7781
7782 // Stream claims 100% of bytes but overhead is small (last stream)
7783 distribute_overhead(
7784 &mut metrics,
7785 &mut overhead_bin,
7786 &mut overhead_bout,
7787 (1000, 1000), // stream_bytes
7788 (1000, 1000), // total_bytes
7789 1, // active_streams
7790 true, // is_last_stream
7791 );
7792
7793 assert_eq!(metrics.bin, 10);
7794 assert_eq!(metrics.bout, 10);
7795 assert_eq!(overhead_bin, 0);
7796 assert_eq!(overhead_bout, 0);
7797 }
7798
7799 #[test]
7800 fn test_distribute_overhead_zero_active_streams() {
7801 let mut metrics = SessionMetrics::new(None);
7802 let mut overhead_bin = 100;
7803 let mut overhead_bout = 100;
7804
7805 // 0 active streams (edge case) — last stream gets all remainder
7806 distribute_overhead(
7807 &mut metrics,
7808 &mut overhead_bin,
7809 &mut overhead_bout,
7810 (0, 0),
7811 (0, 0),
7812 0,
7813 true,
7814 );
7815
7816 assert_eq!(metrics.bin, 100); // last stream gets all remaining
7817 assert_eq!(metrics.bout, 100);
7818 assert_eq!(overhead_bin, 0);
7819 assert_eq!(overhead_bout, 0);
7820 }
7821
7822 #[test]
7823 fn test_distribute_overhead_last_stream_gets_remainder() {
7824 let mut metrics1 = SessionMetrics::new(None);
7825 let mut metrics2 = SessionMetrics::new(None);
7826 let mut overhead_bin = 120;
7827 let mut overhead_bout = 120;
7828
7829 // First stream (not last): gets proportional share
7830 distribute_overhead(
7831 &mut metrics1,
7832 &mut overhead_bin,
7833 &mut overhead_bout,
7834 (100, 100), // stream_bytes
7835 (300, 300), // total_bytes
7836 3, // active_streams
7837 false, // is_last_stream
7838 );
7839
7840 let remaining_bin = overhead_bin;
7841 let remaining_bout = overhead_bout;
7842
7843 // Last stream: gets ALL remaining overhead (no rounding loss)
7844 distribute_overhead(
7845 &mut metrics2,
7846 &mut overhead_bin,
7847 &mut overhead_bout,
7848 (100, 100), // stream_bytes
7849 (300, 300), // total_bytes
7850 3, // active_streams
7851 true, // is_last_stream
7852 );
7853
7854 assert_eq!(metrics2.bin, remaining_bin);
7855 assert_eq!(metrics2.bout, remaining_bout);
7856 assert_eq!(overhead_bin, 0, "no remainder bytes should be lost");
7857 assert_eq!(overhead_bout, 0, "no remainder bytes should be lost");
7858 }
7859
7860 // ── H2FlowControl (additional edge cases) ─────────────────────────
7861
7862 #[test]
7863 fn test_flow_control_queue_window_update_cap() {
7864 // Verify DEFAULT_MAX_PENDING_WINDOW_UPDATES reflects 1 + 4*MAX_CONCURRENT_STREAMS
7865 assert_eq!(DEFAULT_MAX_PENDING_WINDOW_UPDATES, 1 + 100 * 4);
7866
7867 // Simulate queue reaching capacity
7868 let cap = DEFAULT_MAX_PENDING_WINDOW_UPDATES;
7869 let mut updates: HashMap<u32, u32> = HashMap::new();
7870 for i in 0..cap as u32 {
7871 updates.insert(i, 1000);
7872 }
7873 assert_eq!(updates.len(), cap);
7874
7875 // A new stream ID beyond capacity should be rejected
7876 let next_stream = cap as u32;
7877 let at_cap = updates.len() >= cap;
7878 assert!(at_cap);
7879 assert!(!updates.contains_key(&next_stream));
7880
7881 // Verify custom max_concurrent_streams produces proportional cap
7882 let custom_cap = 1 + 500_usize * 4;
7883 assert_eq!(custom_cap, 2001);
7884 }
7885
7886 #[test]
7887 fn test_h2_connection_config_defaults() {
7888 let config = H2ConnectionConfig::default();
7889 assert_eq!(config.initial_connection_window, ENLARGED_CONNECTION_WINDOW);
7890 assert_eq!(
7891 config.max_concurrent_streams,
7892 DEFAULT_MAX_CONCURRENT_STREAMS
7893 );
7894 assert_eq!(config.stream_shrink_ratio, 2);
7895 }
7896
7897 #[test]
7898 fn test_h2_connection_config_clamp_window_lower_bound() {
7899 // Below minimum: clamped to DEFAULT_INITIAL_WINDOW_SIZE (65535)
7900 let config = H2ConnectionConfig::new(100, 100, 2);
7901 assert_eq!(
7902 config.initial_connection_window,
7903 DEFAULT_INITIAL_WINDOW_SIZE
7904 );
7905 }
7906
7907 #[test]
7908 fn test_h2_connection_config_clamp_window_upper_bound() {
7909 // Above maximum: clamped to FLOW_CONTROL_MAX_WINDOW (2^31-1)
7910 let config = H2ConnectionConfig::new(u32::MAX, 100, 2);
7911 assert_eq!(config.initial_connection_window, FLOW_CONTROL_MAX_WINDOW);
7912 }
7913
7914 #[test]
7915 fn test_h2_connection_config_clamp_window_exact_minimum() {
7916 // Exactly minimum: no clamping, no zero-increment WINDOW_UPDATE risk
7917 let config = H2ConnectionConfig::new(DEFAULT_INITIAL_WINDOW_SIZE, 100, 2);
7918 assert_eq!(
7919 config.initial_connection_window,
7920 DEFAULT_INITIAL_WINDOW_SIZE
7921 );
7922 // Increment to send would be 0 — the code guards this with `if increment > 0`
7923 let increment = config
7924 .initial_connection_window
7925 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
7926 assert_eq!(increment, 0);
7927 }
7928
7929 #[test]
7930 fn test_h2_connection_config_clamp_shrink_ratio() {
7931 // Below minimum: clamped to 2 (1 would defeat recycling)
7932 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 0);
7933 assert_eq!(config.stream_shrink_ratio, 2);
7934 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 1);
7935 assert_eq!(config.stream_shrink_ratio, 2);
7936 }
7937
7938 #[test]
7939 fn test_h2_connection_config_clamp_concurrent_streams() {
7940 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 0, 2);
7941 assert_eq!(config.max_concurrent_streams, 1);
7942 }
7943
7944 #[test]
7945 fn test_h2_connection_config_from_optional_uses_defaults() {
7946 let config = H2ConnectionConfig::from_optional(None, None, None);
7947 let defaults = H2ConnectionConfig::default();
7948 assert_eq!(config, defaults);
7949 }
7950
7951 #[test]
7952 fn test_h2_connection_config_from_optional_overrides() {
7953 let config = H2ConnectionConfig::from_optional(Some(2_000_000), Some(500), Some(4));
7954 assert_eq!(config.initial_connection_window, 2_000_000);
7955 assert_eq!(config.max_concurrent_streams, 500);
7956 assert_eq!(config.stream_shrink_ratio, 4);
7957 }
7958
7959 #[test]
7960 fn test_flow_control_window_settings_change_negative() {
7961 // RFC 9113 §6.9.2: A change to SETTINGS_INITIAL_WINDOW_SIZE can cause
7962 // the flow-control window to become negative.
7963 let mut fc = H2FlowControl {
7964 window: 100,
7965 received_bytes_since_update: 0,
7966 pending_window_updates: HashMap::new(),
7967 };
7968
7969 // Simulate SETTINGS_INITIAL_WINDOW_SIZE reduction:
7970 // old_initial = 65535, new_initial = 10 => delta = 10 - 65535 = -65525
7971 let old_initial: i32 = DEFAULT_INITIAL_WINDOW_SIZE as i32;
7972 let new_initial: i32 = 10;
7973 let delta = new_initial - old_initial; // -65525
7974 fc.window += delta;
7975
7976 assert!(
7977 fc.window < 0,
7978 "Window must be able to go negative after settings change"
7979 );
7980 assert_eq!(fc.window, 100 + (10 - 65535));
7981 }
7982
7983 #[test]
7984 fn test_flow_control_coalesce_saturates_at_max_increment() {
7985 let max_increment = i32::MAX as u32;
7986 let mut updates: HashMap<u32, u32> = HashMap::new();
7987
7988 // Insert at max and try to coalesce more
7989 updates.insert(1, max_increment);
7990 if let Some(existing) = updates.get_mut(&1) {
7991 *existing = existing.saturating_add(1000).min(max_increment);
7992 }
7993 assert_eq!(*updates.get(&1).unwrap(), max_increment);
7994 }
7995
7996 // ── H2FloodConfig (additional) ───────────────────────────────────
7997
7998 #[test]
7999 fn test_flood_config_default_matches_constants() {
8000 let config = H2FloodConfig::default();
8001 assert_eq!(
8002 config.max_rst_stream_per_window,
8003 DEFAULT_MAX_RST_STREAM_PER_WINDOW
8004 );
8005 assert_eq!(config.max_ping_per_window, DEFAULT_MAX_PING_PER_WINDOW);
8006 assert_eq!(
8007 config.max_settings_per_window,
8008 DEFAULT_MAX_SETTINGS_PER_WINDOW
8009 );
8010 assert_eq!(
8011 config.max_empty_data_per_window,
8012 DEFAULT_MAX_EMPTY_DATA_PER_WINDOW
8013 );
8014 assert_eq!(
8015 config.max_continuation_frames,
8016 DEFAULT_MAX_CONTINUATION_FRAMES
8017 );
8018 assert_eq!(config.max_glitch_count, DEFAULT_MAX_GLITCH_COUNT);
8019 }
8020
8021 #[test]
8022 fn test_flood_config_equality() {
8023 let config_a = H2FloodConfig::default();
8024 let config_b = H2FloodConfig::default();
8025 assert_eq!(config_a, config_b);
8026
8027 let config_c = H2FloodConfig {
8028 max_rst_stream_per_window: 1,
8029 ..H2FloodConfig::default()
8030 };
8031 assert_ne!(config_a, config_c);
8032 }
8033
8034 // ── distribute_overhead (additional edge cases) ───────────────────
8035
8036 #[test]
8037 fn test_distribute_overhead_asymmetric_in_out() {
8038 let mut metrics = SessionMetrics::new(None);
8039 let mut overhead_bin = 1000;
8040 let mut overhead_bout = 1000;
8041
8042 // Stream transferred 100% inbound, 0% outbound (not last stream)
8043 distribute_overhead(
8044 &mut metrics,
8045 &mut overhead_bin,
8046 &mut overhead_bout,
8047 (500, 0), // stream_bytes
8048 (500, 100), // total_bytes
8049 2, // active_streams
8050 false, // is_last_stream
8051 );
8052
8053 assert_eq!(metrics.bin, 1000); // 100% of inbound overhead
8054 assert_eq!(metrics.bout, 0); // 0% of outbound overhead
8055 assert_eq!(overhead_bin, 0);
8056 assert_eq!(overhead_bout, 1000);
8057 }
8058
8059 #[test]
8060 fn test_distribute_overhead_many_streams_accumulate() {
8061 let mut metrics = SessionMetrics::new(None);
8062 let mut overhead_bin = 120;
8063 let mut overhead_bout = 120;
8064
8065 // Three equal streams, each calling distribute_overhead.
8066 // With is_last_stream on the third call, the last stream gets all
8067 // remaining overhead, so no rounding loss occurs.
8068 // call 1: 120 * 100/300 = 40 -> remaining 80
8069 // call 2: 80 * 100/300 = 26 -> remaining 54
8070 // call 3: last stream gets all remaining = 54
8071 // Total distributed: 40 + 26 + 54 = 120 (no loss)
8072 for i in 0..3 {
8073 distribute_overhead(
8074 &mut metrics,
8075 &mut overhead_bin,
8076 &mut overhead_bout,
8077 (100, 100), // stream_bytes
8078 (300, 300), // total_bytes
8079 3, // active_streams
8080 i == 2, // is_last_stream on final call
8081 );
8082 }
8083
8084 assert_eq!(metrics.bin, 120);
8085 assert_eq!(metrics.bout, 120);
8086 // No rounding residual — last stream absorbed the remainder
8087 assert_eq!(overhead_bin, 0);
8088 assert_eq!(overhead_bout, 0);
8089 }
8090
8091 // ── Hex chunk formatting ────────────────────────────────────────────
8092
8093 /// Verify that the Vec<u8> + write!() hex formatting used in
8094 /// handle_data_frame produces output identical to format!("{:x}").
8095 #[test]
8096 fn test_hex_chunk_length_formatting() {
8097 use std::io::Write as _;
8098
8099 let cases: &[(usize, &[u8])] = &[
8100 (1, b"1"),
8101 (15, b"f"),
8102 (16, b"10"),
8103 (255, b"ff"),
8104 (256, b"100"),
8105 (4096, b"1000"),
8106 (65535, b"ffff"),
8107 (65536, b"10000"),
8108 ];
8109
8110 for &(payload_len, expected) in cases {
8111 let mut buf = Vec::with_capacity(16);
8112 let _ = write!(buf, "{payload_len:x}");
8113 assert_eq!(
8114 buf, expected,
8115 "hex formatting mismatch for payload_len={payload_len}"
8116 );
8117 }
8118
8119 // usize::MAX tested separately to avoid temporary lifetime issue
8120 let max_expected = format!("{:x}", usize::MAX);
8121 let mut buf = Vec::with_capacity(16);
8122 let _ = write!(buf, "{:x}", usize::MAX);
8123 assert_eq!(buf, max_expected.as_bytes());
8124 }
8125
8126 // ── Stream-ID allocation / exhaustion ──────────────────────────────────
8127
8128 /// A fresh client connection starts with `last_stream_id == 0`. The first
8129 /// call MUST issue stream `1` (odd, RFC 9113 §5.1.1) and advance the
8130 /// watermark to `2`.
8131 #[test]
8132 fn test_next_stream_id_client_first_allocation() {
8133 let (issued, next) = next_stream_id(0, true).expect("fresh client must allocate");
8134 assert_eq!(issued, 1);
8135 assert_eq!(next, 2);
8136 }
8137
8138 /// Client allocation yields strictly increasing odd identifiers
8139 /// (1, 3, 5, ...) as required by RFC 9113 §5.1.1.
8140 #[test]
8141 fn test_next_stream_id_client_sequence_is_odd_and_monotonic() {
8142 let mut last = 0u32;
8143 let mut issued_ids = Vec::with_capacity(8);
8144 for _ in 0..8 {
8145 let (id, next) = next_stream_id(last, true).expect("unexhausted");
8146 assert_eq!(id & 1, 1, "client stream ids must be odd (RFC 9113 §5.1.1)");
8147 assert!(issued_ids.last().is_none_or(|prev: &u32| id > *prev));
8148 issued_ids.push(id);
8149 last = next;
8150 }
8151 assert_eq!(issued_ids, vec![1, 3, 5, 7, 9, 11, 13, 15]);
8152 }
8153
8154 /// Server-side allocation yields even identifiers. The helper
8155 /// convention is `watermark - 2` for server, `watermark - 1` for client,
8156 /// so both sides share the same monotonically-increasing even watermark.
8157 /// Sōzu never server-pushes, but the helper must be symmetric so push
8158 /// could be enabled without a regression.
8159 #[test]
8160 fn test_next_stream_id_server_is_even() {
8161 // `last = 2` means the most recent allocation advanced the watermark
8162 // to 2; server then issues `2 - 2 = 0`. This is an artefact of the
8163 // shared watermark and only matters in tests — server never uses it.
8164 let (issued, next) = next_stream_id(2, false).expect("server allocation");
8165 assert_eq!(issued & 1, 0, "server stream ids must be even");
8166 assert_eq!(next, 4);
8167 assert_eq!(issued, 2);
8168
8169 let (issued, next) = next_stream_id(next, false).expect("second slot");
8170 assert_eq!(issued, 4);
8171 assert_eq!(issued & 1, 0);
8172 assert_eq!(next, 6);
8173 }
8174
8175 /// The last client-issuable odd stream ID is `STREAM_ID_MAX = 0x7FFF_FFFF`.
8176 /// To issue it the watermark must advance to `STREAM_ID_MAX + 1 = 2³¹`;
8177 /// the caller therefore supplies `last = STREAM_ID_MAX - 1 = 0x7FFF_FFFE`.
8178 /// That call MUST succeed and return the max ID; the post-call watermark
8179 /// sits at `2³¹`, which is the sentinel that makes the next call fail.
8180 #[test]
8181 fn test_next_stream_id_client_final_slot_allocates() {
8182 let last = STREAM_ID_MAX - 1;
8183 let (issued, next) = next_stream_id(last, true).expect("final slot still allocates");
8184 assert_eq!(issued, STREAM_ID_MAX);
8185 assert_eq!(next, STREAM_ID_MAX + 1);
8186 // And the very next call MUST refuse rather than wrap.
8187 assert!(next_stream_id(next, true).is_none());
8188 }
8189
8190 /// Exhaustion case: once the client has issued stream ID `STREAM_ID_MAX`,
8191 /// the watermark sits at `STREAM_ID_MAX + 1`. The next request MUST return
8192 /// `None` — without this guard the helper would issue `STREAM_ID_MAX + 2`
8193 /// (wrapped down to an even id), which would (a) use the reserved
8194 /// high bit and (b) violate the odd-parity invariant for client streams.
8195 #[test]
8196 fn test_next_stream_id_client_exhausted_returns_none() {
8197 let last = STREAM_ID_MAX + 1;
8198 assert!(next_stream_id(last, true).is_none());
8199 }
8200
8201 /// Exhaustion via `checked_add` saturation: defence in depth in case a
8202 /// caller jumps `last_stream_id` close to `u32::MAX`. The helper must
8203 /// not panic nor overflow — it must return `None`.
8204 #[test]
8205 fn test_next_stream_id_saturates_near_u32_max() {
8206 assert!(next_stream_id(u32::MAX, true).is_none());
8207 assert!(next_stream_id(u32::MAX - 1, true).is_none());
8208 }
8209
8210 /// Server-side exhaustion: same guard, even-parity identifier space.
8211 #[test]
8212 fn test_next_stream_id_server_exhausted_returns_none() {
8213 let last = STREAM_ID_MAX + 1;
8214 assert!(next_stream_id(last, false).is_none());
8215 }
8216
8217 /// Regression guard: the helper must never issue a stream ID that
8218 /// exceeds `STREAM_ID_MAX` for either side, no matter where the
8219 /// watermark sits. This walks every value in a neighbourhood of the
8220 /// boundary to rule out off-by-one errors.
8221 #[test]
8222 fn test_next_stream_id_never_exceeds_stream_id_max() {
8223 for last in (STREAM_ID_MAX - 4)..=(STREAM_ID_MAX + 4) {
8224 for is_client in [true, false] {
8225 if let Some((issued, next)) = next_stream_id(last, is_client) {
8226 assert!(
8227 issued <= STREAM_ID_MAX,
8228 "issued id {issued} exceeds STREAM_ID_MAX (last={last}, is_client={is_client})"
8229 );
8230 // `next` is the post-allocation watermark and may sit at
8231 // STREAM_ID_MAX + 1 — the very next call must then return None.
8232 if next > STREAM_ID_MAX {
8233 assert!(
8234 next_stream_id(next, is_client).is_none(),
8235 "second call after final slot must report exhaustion"
8236 );
8237 }
8238 }
8239 }
8240 }
8241 }
8242
8243 /// The helper's `is_client` flag must cleanly split the ID space so that
8244 /// a client and a server peered on the same connection cannot collide.
8245 /// Given the same `last_stream_id`, the two parities must differ by 1.
8246 #[test]
8247 fn test_next_stream_id_client_server_parities_disjoint() {
8248 for last in [0u32, 2, 4, 10, 100, 1_000_000, STREAM_ID_MAX - 3] {
8249 let (client_id, _) = next_stream_id(last, true).unwrap();
8250 let (server_id, _) = next_stream_id(last, false).unwrap();
8251 assert_eq!(client_id & 1, 1);
8252 assert_eq!(server_id & 1, 0);
8253 assert_eq!(client_id.abs_diff(server_id), 1);
8254 }
8255 }
8256
8257 // ── LIFECYCLE §9 invariant 16: any_stream_id_matches ─────────────────
8258 //
8259 // Covers the iteration dispatch used by `any_stream_has_pending_back`.
8260 // Testing the probe directly against a synthetic closure keeps the
8261 // tests independent of the full `Stream` fixture (which requires a
8262 // `Pool` and a fully-built `HttpContext`).
8263
8264 #[test]
8265 fn test_any_stream_id_matches_empty_map_is_false() {
8266 let streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8267 assert!(!any_stream_id_matches(&streams, |_| true));
8268 }
8269
8270 #[test]
8271 fn test_any_stream_id_matches_all_probe_false_is_false() {
8272 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8273 streams.insert(1, 0);
8274 streams.insert(3, 1);
8275 streams.insert(5, 2);
8276 assert!(!any_stream_id_matches(&streams, |_| false));
8277 }
8278
8279 #[test]
8280 fn test_any_stream_id_matches_any_probe_true_is_true() {
8281 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8282 streams.insert(1, 0);
8283 streams.insert(3, 1);
8284 streams.insert(5, 2);
8285 // Probe is true only for GlobalStreamId == 1 (i.e. StreamId 3).
8286 assert!(any_stream_id_matches(&streams, |gid| gid == 1));
8287 }
8288
8289 #[test]
8290 fn test_any_stream_id_matches_single_entry() {
8291 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8292 streams.insert(42, 7);
8293 assert!(any_stream_id_matches(&streams, |gid| gid == 7));
8294 assert!(!any_stream_id_matches(&streams, |gid| gid == 8));
8295 }
8296
8297 #[test]
8298 fn test_any_stream_id_matches_short_circuits() {
8299 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8300 streams.insert(1, 0);
8301 streams.insert(3, 1);
8302 streams.insert(5, 2);
8303 streams.insert(7, 3);
8304 let mut calls = 0usize;
8305 let result = any_stream_id_matches(&streams, |_| {
8306 calls += 1;
8307 true
8308 });
8309 assert!(result);
8310 // `Iterator::any` short-circuits on the first `true` — so the probe
8311 // must fire at most once in this construction.
8312 assert_eq!(calls, 1);
8313 }
8314
8315 // ── cumulative-stall budget decision (fc_stall_budget_decision) ──
8316
8317 #[test]
8318 fn test_fc_stall_budget_open_window_always_clears() {
8319 // A genuinely open send window is a real un-stall, regardless of prior
8320 // accumulated progress or this pass's drain.
8321 assert_eq!(
8322 fc_stall_budget_decision(false, 0, None),
8323 FcStallAction::Clear
8324 );
8325 assert_eq!(
8326 fc_stall_budget_decision(false, 1, Some(5)),
8327 FcStallAction::Clear
8328 );
8329 assert_eq!(
8330 fc_stall_budget_decision(false, i32::MAX, Some(FC_STALL_CLEAR_FLOOR)),
8331 FcStallAction::Clear
8332 );
8333 }
8334
8335 #[test]
8336 fn test_fc_stall_budget_blocked_arms_and_accumulates() {
8337 // First blocked pass arms with this pass's drain.
8338 assert_eq!(
8339 fc_stall_budget_decision(true, 1, None),
8340 FcStallAction::Arm { progress: 1 }
8341 );
8342 // A blocked pass with no drain keeps the accumulator unchanged, so the
8343 // deadline keeps aging (a window-0 stall makes consumed == 0).
8344 assert_eq!(
8345 fc_stall_budget_decision(true, 0, Some(42)),
8346 FcStallAction::Arm { progress: 42 }
8347 );
8348 // Negative `consumed` is clamped to 0 (defensive; converter.window only
8349 // shrinks, so consumed is >= 0 in practice).
8350 assert_eq!(
8351 fc_stall_budget_decision(true, -10, Some(7)),
8352 FcStallAction::Arm { progress: 7 }
8353 );
8354 }
8355
8356 #[test]
8357 fn test_fc_stall_budget_floor_clears() {
8358 // Reaching the floor in a single pass (a full DATA frame of real
8359 // delivery) clears the deadline.
8360 assert_eq!(
8361 fc_stall_budget_decision(true, FC_STALL_CLEAR_FLOOR as i32, None),
8362 FcStallAction::Clear
8363 );
8364 // Exactly one byte below the floor still arms.
8365 assert_eq!(
8366 fc_stall_budget_decision(true, (FC_STALL_CLEAR_FLOOR - 1) as i32, None),
8367 FcStallAction::Arm {
8368 progress: FC_STALL_CLEAR_FLOOR - 1
8369 }
8370 );
8371 // Prior progress plus this pass crossing the floor clears.
8372 assert_eq!(
8373 fc_stall_budget_decision(true, 1, Some(FC_STALL_CLEAR_FLOOR - 1)),
8374 FcStallAction::Clear
8375 );
8376 }
8377
8378 #[test]
8379 fn test_fc_stall_budget_wu_drip_ages_until_floor() {
8380 // The WINDOW_UPDATE(+1) closure: a 1-byte-per-pass drip must keep the
8381 // deadline armed (aging) for the whole run up to the floor and only
8382 // clear on the pass that reaches it — so a drip granting < floor bytes
8383 // per idle period is reaped, never kept alive. This is the unit-level
8384 // proof that the budget closes the WINDOW_UPDATE-drip vector.
8385 let mut progress: Option<usize> = None;
8386 for pass in 1..FC_STALL_CLEAR_FLOOR {
8387 match fc_stall_budget_decision(true, 1, progress) {
8388 FcStallAction::Arm { progress: p } => {
8389 assert_eq!(p, pass, "drip accumulator off at pass {pass}");
8390 progress = Some(p);
8391 }
8392 FcStallAction::Clear => panic!("drip cleared the deadline early at pass {pass}"),
8393 }
8394 }
8395 // The pass that reaches the floor finally clears.
8396 assert_eq!(
8397 fc_stall_budget_decision(true, 1, progress),
8398 FcStallAction::Clear
8399 );
8400 }
8401
8402 // ── flow-control-stall reaper union (collect_timed_out_streams) ──
8403
8404 #[test]
8405 fn test_collect_timed_out_streams_reaps_fc_stall_despite_fresh_liveness() {
8406 // A window-stalled stream MUST be reaped on the flow-control-stall
8407 // deadline even if its bidirectional-liveness timer is fresh — an
8408 // inbound 1-byte DATA drip keeps `last_activity` warm but never touches
8409 // `fc_stalled`. Without the `fc_stalled` guard this stream is never
8410 // reaped (the pre-fix window-stall hold).
8411 let now = Instant::now();
8412 let deadline = std::time::Duration::from_secs(2);
8413 let mut live = HashMap::new();
8414 live.insert(7u32, 0usize);
8415 let rst_sent = HashSet::new();
8416 let mut last_activity = HashMap::new();
8417 last_activity.insert(7u32, now); // fresh: just received an inbound DATA drip
8418 let mut fc_stalled = HashMap::new();
8419 fc_stalled.insert(7u32, now - std::time::Duration::from_secs(5));
8420 let out =
8421 collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline);
8422 assert_eq!(out, vec![(7u32, "H2::WindowStall")]);
8423 }
8424
8425 #[test]
8426 fn test_collect_timed_out_streams_idle_dedup_and_filters() {
8427 let now = Instant::now();
8428 let deadline = std::time::Duration::from_secs(2);
8429 let old = now - std::time::Duration::from_secs(5);
8430 let mut live = HashMap::new();
8431 for sid in [1u32, 3, 5, 9] {
8432 live.insert(sid, 0usize);
8433 }
8434 let mut rst_sent = HashSet::new();
8435 rst_sent.insert(9u32); // already resetting -> excluded
8436 let mut last_activity = HashMap::new();
8437 last_activity.insert(1u32, old); // idle past deadline
8438 last_activity.insert(3u32, now); // fresh -> survives
8439 last_activity.insert(5u32, old); // idle AND fc-stalled -> dedup to one entry
8440 last_activity.insert(9u32, old); // idle but rst_sent -> excluded
8441 last_activity.insert(11u32, old); // not a live stream -> excluded
8442 let mut fc_stalled = HashMap::new();
8443 fc_stalled.insert(5u32, old);
8444 let mut out =
8445 collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline);
8446 out.sort();
8447 assert_eq!(
8448 out,
8449 vec![(1u32, "H2::IdleTimeout"), (5u32, "H2::IdleTimeout")]
8450 );
8451 }
8452
8453 #[test]
8454 fn test_collect_timed_out_streams_empty_when_all_fresh() {
8455 let now = Instant::now();
8456 let deadline = std::time::Duration::from_secs(2);
8457 let mut live = HashMap::new();
8458 live.insert(1u32, 0usize);
8459 let rst_sent = HashSet::new();
8460 let mut last_activity = HashMap::new();
8461 last_activity.insert(1u32, now);
8462 let mut fc_stalled = HashMap::new();
8463 fc_stalled.insert(1u32, now);
8464 assert!(
8465 collect_timed_out_streams(&last_activity, &fc_stalled, &live, &rst_sent, now, deadline)
8466 .is_empty()
8467 );
8468 }
8469
8470 // ── LIFECYCLE §9 invariant 16: any_stream_has_pending_back ───────────
8471
8472 /// Build a minimal `Stream` for invariant-16 probing. Uses the pool
8473 /// plumbing so `back.blocks` / `back.out` exist; every other field is
8474 /// default-valued because the predicate only reads the back buffer.
8475 fn make_stream_for_invariant_16(pool: &Rc<RefCell<Pool>>, session_ulid: Ulid) -> Stream {
8476 let http_ctx = HttpContext {
8477 keep_alive_backend: true,
8478 keep_alive_frontend: true,
8479 sticky_session_found: None,
8480 method: None,
8481 authority: None,
8482 path: None,
8483 status: None,
8484 reason: None,
8485 user_agent: None,
8486 x_request_id: None,
8487 xff_chain: None,
8488 #[cfg(feature = "opentelemetry")]
8489 otel: None,
8490 closing: false,
8491 session_id: session_ulid,
8492 id: Ulid::generate(),
8493 backend_id: None,
8494 cluster_id: None,
8495 protocol: Protocol::HTTPS,
8496 public_address: "127.0.0.1:0".parse().unwrap(),
8497 session_address: None,
8498 sticky_name: String::new(),
8499 sticky_session: None,
8500 backend_address: None,
8501 tls_server_name: None,
8502 tls_cert_names: None,
8503 strict_sni_binding: false,
8504 elide_x_real_ip: false,
8505 send_x_real_ip: false,
8506 tls_version: None,
8507 tls_cipher: None,
8508 tls_alpn: None,
8509 sozu_id_header: String::from("Sozu-Id"),
8510 redirect_location: None,
8511 www_authenticate: None,
8512 original_authority: None,
8513 headers_response: Vec::new(),
8514 retry_after_seconds: None,
8515 frontend_redirect_template: None,
8516 redirect_status: None,
8517 access_log_message: None,
8518 };
8519 Stream::new(Rc::downgrade(pool), http_ctx, 65_535)
8520 .expect("pool should have capacity for two buffers")
8521 }
8522
8523 fn make_pool_for_invariant_16() -> Rc<RefCell<Pool>> {
8524 // Two buffer slots per stream (front + back), ten stream slots is
8525 // plenty for the tests below.
8526 Rc::new(RefCell::new(Pool::with_capacity(4, 20, 16_384)))
8527 }
8528
8529 #[test]
8530 fn test_any_stream_has_pending_back_empty_map_is_false() {
8531 let pool = make_pool_for_invariant_16();
8532 let ulid = Ulid::generate();
8533 let streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8534 let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
8535 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8536 }
8537
8538 #[test]
8539 fn test_any_stream_has_pending_back_all_drained_is_false() {
8540 let pool = make_pool_for_invariant_16();
8541 let ulid = Ulid::generate();
8542 let context_streams = vec![
8543 make_stream_for_invariant_16(&pool, ulid),
8544 make_stream_for_invariant_16(&pool, ulid),
8545 ];
8546 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8547 streams_map.insert(1, 0);
8548 streams_map.insert(3, 1);
8549 // Both freshly-built streams have empty back.out and back.blocks
8550 // (Kawa::new starts with empty deques).
8551 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8552 }
8553
8554 #[test]
8555 fn test_any_stream_has_pending_back_unknown_gid_is_false() {
8556 // LIFECYCLE invariant 16 defence-in-depth: an unknown
8557 // `GlobalStreamId` during a stream-removal race must not panic;
8558 // `.get()` must short-circuit to `false`.
8559 let pool = make_pool_for_invariant_16();
8560 let ulid = Ulid::generate();
8561 let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
8562 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8563 // GlobalStreamId 42 is out of range for the 1-element slice above.
8564 streams_map.insert(7, 42);
8565 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
8566 }
8567
8568 #[test]
8569 fn test_any_stream_has_pending_back_with_pending_blocks_is_true() {
8570 let pool = make_pool_for_invariant_16();
8571 let ulid = Ulid::generate();
8572 let mut stream = make_stream_for_invariant_16(&pool, ulid);
8573 // Push one dummy block — any Block variant is fine; the predicate
8574 // only checks `blocks.is_empty()`.
8575 stream.back.blocks.push_back(kawa::Block::StatusLine);
8576 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8577 streams_map.insert(1, 0);
8578 assert!(any_stream_has_pending_back(&streams_map, &[stream]));
8579 }
8580
8581 #[test]
8582 fn test_any_stream_has_pending_back_with_pending_out_is_true() {
8583 let pool = make_pool_for_invariant_16();
8584 let ulid = Ulid::generate();
8585 let mut stream = make_stream_for_invariant_16(&pool, ulid);
8586 // Non-empty out buffer with no blocks.
8587 stream
8588 .back
8589 .out
8590 .push_back(kawa::OutBlock::Store(kawa::Store::Static(b"partial frame")));
8591 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
8592 streams_map.insert(1, 0);
8593 assert!(any_stream_has_pending_back(&streams_map, &[stream]));
8594 }
8595
8596 // ── ready_incremental_by_urgency mid-pass consistency ────────────────
8597 //
8598 // The full RED is in e2e and currently #[ignore]'d (timing-sensitive).
8599 // The scalar logic below pins the saturating_sub + bucket-scoped
8600 // decrement contract the scheduler at h2.rs:2412-2414 + h2.rs:2481
8601 // relies on: a same-urgency transition-to-ineligible MUST drop the
8602 // per-bucket count by exactly 1 and never underflow the u64.
8603
8604 fn make_bucket(counts: &[(u8, usize)]) -> HashMap<u8, usize> {
8605 counts.iter().copied().collect()
8606 }
8607
8608 #[test]
8609 fn ready_incremental_bucket_decrement_reduces_same_urgency_only() {
8610 let mut map = make_bucket(&[(1, 3), (3, 2)]);
8611 let urgency: u8 = 1;
8612 let is_incremental = true;
8613 // Simulate a stream in urgency=1 going ineligible mid-pass.
8614 if is_incremental {
8615 if let Some(c) = map.get_mut(&urgency) {
8616 *c = c.saturating_sub(1);
8617 }
8618 }
8619 assert_eq!(map.get(&1), Some(&2), "urgency-1 bucket must drop to 2");
8620 assert_eq!(map.get(&3), Some(&2), "urgency-3 bucket untouched");
8621 }
8622
8623 #[test]
8624 fn ready_incremental_bucket_decrement_saturates_at_zero() {
8625 let mut map = make_bucket(&[(0, 0)]);
8626 let urgency: u8 = 0;
8627 if let Some(c) = map.get_mut(&urgency) {
8628 *c = c.saturating_sub(1);
8629 }
8630 assert_eq!(map.get(&0), Some(&0), "saturating_sub must not underflow");
8631 }
8632
8633 #[test]
8634 fn ready_incremental_bucket_decrement_skipped_for_non_incremental() {
8635 let mut map = make_bucket(&[(1, 3)]);
8636 let is_incremental = false;
8637 if is_incremental {
8638 if let Some(c) = map.get_mut(&1) {
8639 *c = c.saturating_sub(1);
8640 }
8641 }
8642 assert_eq!(
8643 map.get(&1),
8644 Some(&3),
8645 "non-incremental transitions must not touch the bucket"
8646 );
8647 }
8648
8649 // ── enqueue_rst: queue / dedupe / counter / arm invariants ───────────
8650 //
8651 // `enqueue_rst_into` is the free-function primitive shared by all three
8652 // RST push sites (DATA-on-closed, refuse_stream_and_discard,
8653 // reset_stream). The method delegates; the invariants live here.
8654
8655 #[test]
8656 fn test_enqueue_rst_into_populates_queue_and_dedupe() {
8657 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8658 let mut total: usize = 0;
8659 let mut sent: HashSet<StreamId> = HashSet::new();
8660 let mut readiness = Readiness::new();
8661
8662 let first = enqueue_rst_into(
8663 &mut pending,
8664 &mut total,
8665 &mut sent,
8666 &mut readiness,
8667 5,
8668 H2Error::ProtocolError,
8669 );
8670 assert!(first, "first call must report freshly_queued = true");
8671 // Second call for the same stream must be a no-op AND return
8672 // false so accounting in `Self::enqueue_rst` skips this case.
8673 let second = enqueue_rst_into(
8674 &mut pending,
8675 &mut total,
8676 &mut sent,
8677 &mut readiness,
8678 5,
8679 H2Error::InternalError,
8680 );
8681 assert!(
8682 !second,
8683 "second call for same stream must return freshly_queued = false"
8684 );
8685
8686 assert_eq!(pending.len(), 1, "dedupe must collapse to a single entry");
8687 assert_eq!(
8688 pending[0],
8689 (5, H2Error::ProtocolError),
8690 "the first error wins — second push is ignored"
8691 );
8692 assert_eq!(total, 1, "queued-cap counter must bump exactly once");
8693 assert!(sent.contains(&5), "rst_sent must record the id");
8694 }
8695
8696 #[test]
8697 fn test_enqueue_rst_into_bumps_total_for_distinct_ids() {
8698 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8699 let mut total: usize = 0;
8700 let mut sent: HashSet<StreamId> = HashSet::new();
8701 let mut readiness = Readiness::new();
8702
8703 for sid in [1u32, 3, 5, 7] {
8704 enqueue_rst_into(
8705 &mut pending,
8706 &mut total,
8707 &mut sent,
8708 &mut readiness,
8709 sid,
8710 H2Error::ProtocolError,
8711 );
8712 }
8713
8714 assert_eq!(pending.len(), 4);
8715 assert_eq!(total, 4);
8716 assert_eq!(sent.len(), 4);
8717 }
8718
8719 #[test]
8720 fn test_enqueue_rst_into_arms_writable_in_invariant_15_form() {
8721 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8722 let mut total: usize = 0;
8723 let mut sent: HashSet<StreamId> = HashSet::new();
8724 let mut readiness = Readiness::new();
8725
8726 // Precondition: no WRITABLE bits set.
8727 assert!(!readiness.interest.is_writable());
8728 assert!(!readiness.event.is_writable());
8729
8730 enqueue_rst_into(
8731 &mut pending,
8732 &mut total,
8733 &mut sent,
8734 &mut readiness,
8735 9,
8736 H2Error::FlowControlError,
8737 );
8738
8739 // Postcondition: invariant-15 — both `interest` and `event` WRITABLE
8740 // are raised so the next tick runs `writable()` under edge-triggered
8741 // epoll.
8742 assert!(
8743 readiness.interest.is_writable(),
8744 "arm_writable must raise the interest bit"
8745 );
8746 assert!(
8747 readiness.event.is_writable(),
8748 "arm_writable must raise the event bit (edge-triggered epoll)"
8749 );
8750 }
8751
8752 #[test]
8753 fn test_enqueue_rst_into_dedupe_does_not_rearm_writable() {
8754 // Dedupe is a pure short-circuit: if the stream id is already in
8755 // `rst_sent`, we do not touch the readiness. This matters because
8756 // a re-entrant reset_stream call during a cascading error path
8757 // would otherwise re-raise WRITABLE unnecessarily — harmless but
8758 // noisy in metrics.
8759 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
8760 let mut total: usize = 0;
8761 let mut sent: HashSet<StreamId> = HashSet::new();
8762 sent.insert(11);
8763 let mut readiness = Readiness::new();
8764
8765 enqueue_rst_into(
8766 &mut pending,
8767 &mut total,
8768 &mut sent,
8769 &mut readiness,
8770 11,
8771 H2Error::ProtocolError,
8772 );
8773
8774 assert!(
8775 pending.is_empty(),
8776 "already-sent ids must not queue a second frame"
8777 );
8778 assert_eq!(total, 0);
8779 assert!(!readiness.interest.is_writable());
8780 assert!(!readiness.event.is_writable());
8781 }
8782
8783 // ── forcefully_terminate_answer arms WRITABLE for ET epoll ───────────
8784 //
8785 // Gap A in the h2spec diagnosis: the pre-fix code set `interest` but
8786 // never raised `event`, so `filter_interest() = event & interest` was
8787 // zero and `writable()` was never scheduled. This test pins the fix.
8788
8789 #[test]
8790 fn test_forcefully_terminate_answer_arms_event_and_interest() {
8791 let pool = make_pool_for_invariant_16();
8792 let ulid = Ulid::generate();
8793 let mut stream = make_stream_for_invariant_16(&pool, ulid);
8794 let mut readiness = Readiness::new();
8795
8796 assert!(!readiness.interest.is_writable());
8797 assert!(!readiness.event.is_writable());
8798
8799 forcefully_terminate_answer(&mut stream, &mut readiness, H2Error::ProtocolError);
8800
8801 assert!(
8802 readiness.interest.is_writable(),
8803 "forcefully_terminate_answer must set the WRITABLE interest bit"
8804 );
8805 assert!(
8806 readiness.event.is_writable(),
8807 "forcefully_terminate_answer must set the WRITABLE event bit — \
8808 without this, filter_interest() = 0 under edge-triggered epoll \
8809 and writable() is never scheduled (h2spec Gap A)"
8810 );
8811 }
8812}