sozu_lib/protocol/mux/h2.rs
1//! H2 mux connection wrapper (RFC 9113).
2//!
3//! Owns wire-side connection state: HPACK encoder/decoder, peer settings,
4//! flow window, GOAWAY/RST attribution, and the [`H2FloodDetector`] backing
5//! the CVE-2023-44487 / CVE-2024-27316 / CVE-2025-8671 mitigations. Stream
6//! storage lives in the sibling `Context<L>` (`mux/mod.rs`); this module is
7//! the canonical home for the edge-trigger discipline — paths that queue
8//! bytes for a later event-loop pass must arm writable / signal pending
9//! write (cf. `arm_writable()` at the deferred-control-frame sites and
10//! `lib/src/lib.rs:1006`-`1010`).
11
12use std::{
13 cmp::min,
14 collections::{HashMap, HashSet},
15 io::{IoSlice, Write as _},
16 time::{Duration, Instant},
17};
18
19/// Compile-time guard: `payload_len as usize` casts in the H2 parser assume at
20/// least 32-bit pointer width. This prevents silent truncation on platforms
21/// with smaller pointers (e.g. 16-bit embedded targets).
22const _: () = assert!(
23 std::mem::size_of::<usize>() >= 4,
24 "sozu requires at least 32-bit pointers"
25);
26
27use rusty_ulid::Ulid;
28use sozu_command::{logging::ansi_palette, ready::Ready};
29
30use crate::metrics::names;
31use crate::{
32 L7ListenerHandler, ListenerHandler, Protocol, Readiness, SessionMetrics,
33 protocol::mux::{
34 BackendStatus, Context, DebugEvent, DebugHistory, Endpoint, GenericHttpStream,
35 GlobalStreamId, MuxResult, Position, Stream, StreamId, StreamState, converter,
36 forcefully_terminate_answer,
37 parser::{self, Frame, FrameHeader, FrameType, H2Error, Headers, WindowUpdate},
38 pkawa, remove_backend_stream, serializer, set_default_answer,
39 shared::{EndStreamAction, drain_tls_close_notify, end_stream_decision},
40 update_readiness_after_read, update_readiness_after_write,
41 },
42 socket::{SocketHandler, SocketResult, stats::socket_rtt},
43 timer::TimeoutContainer,
44};
45
46/// Protocol label + session descriptor used as a prefix on every
47/// [`ConnectionH2`] log line. Matches the RUSTLS log-context convention:
48/// `MUX-H2\tSession(...)\t >>>`. When colored output is enabled (via
49/// [`ansi_palette`]) the label is wrapped in bold bright-white ANSI (uniform
50/// across every protocol) and the session detail is rendered in light grey.
51///
52/// Fields included in the session block (chosen to surface the most common
53/// H2 troubleshooting axes — flow stall, leaked stream, draining state,
54/// peer-side gap, reset-flood exposure):
55/// - `peer` — peer address (or `None` if the socket is gone)
56/// - `position` — `Server` / `Client(...)` orientation
57/// - `state` — current [`H2State`]
58/// - `streams` — number of in-flight streams on this connection
59/// - `last_peer_id` — `highest_peer_stream_id` (gap to the peer's view)
60/// - `window` — connection-level send window (RFC 9113 §6.9)
61/// - `draining` — set after the first GOAWAY of a graceful shutdown
62/// - `total_rst_streams_emitted_lifetime` — MadeYouReset counter (CVE-2025-8671)
63/// - `total_rst_received_lifetime` — Rapid Reset counter (CVE-2023-44487)
64/// - `readiness` — connection-level mio readiness snapshot
65///
66/// Computed lazily on each callsite — the helper only materialises when the
67/// log level is enabled, so uncolored hot paths keep a single thread-local
68/// read (the colored check) and one `format!` allocation.
69macro_rules! log_context {
70 ($self:expr) => {{
71 let (open, reset, grey, gray, white) = ansi_palette();
72 format!(
73 "[{ulid} - - -]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
74 open = open,
75 reset = reset,
76 grey = grey,
77 gray = gray,
78 white = white,
79 ulid = $self.session_ulid,
80 peer = $self.socket.socket_ref().peer_addr().ok(),
81 position = $self.position,
82 state = $self.state,
83 streams = $self.streams.len(),
84 last_peer_id = $self.highest_peer_stream_id,
85 window = $self.flow_control.window,
86 draining = $self.drain.draining,
87 total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
88 total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
89 readiness = $self.readiness,
90 )
91 }};
92}
93
94/// Per-stream variant of [`log_context!`] used when a [`Stream`]'s
95/// [`HttpContext`](crate::protocol::kawa_h1::editor::HttpContext) is in
96/// scope. Populates the `request_id`, `cluster_id` and `backend_id` slots of
97/// the bracket so the log line can be filtered by the specific H2 stream it
98/// belongs to.
99#[allow(unused_macros)]
100macro_rules! log_context_stream {
101 ($self:expr, $http_context:expr) => {{
102 let (open, reset, grey, gray, white) = ansi_palette();
103 format!(
104 "[{ulid} {req} {cluster} {backend}]\t{open}MUX-H2{reset}\t{grey}Session{reset}({gray}peer{reset}={white}{peer:?}{reset}, {gray}position{reset}={white}{position:?}{reset}, {gray}state{reset}={white}{state:?}{reset}, {gray}streams{reset}={white}{streams}{reset}, {gray}last_peer_id{reset}={white}{last_peer_id}{reset}, {gray}window{reset}={white}{window}{reset}, {gray}draining{reset}={white}{draining}{reset}, {gray}total_rst_streams_emitted_lifetime{reset}={white}{total_rst_streams_emitted_lifetime}{reset}, {gray}total_rst_received_lifetime{reset}={white}{total_rst_received_lifetime}{reset}, {gray}readiness{reset}={white}{readiness}{reset})\t >>>",
105 open = open,
106 reset = reset,
107 grey = grey,
108 gray = gray,
109 white = white,
110 ulid = $self.session_ulid,
111 req = $http_context.id,
112 cluster = $http_context.cluster_id.as_deref().unwrap_or("-"),
113 backend = $http_context.backend_id.as_deref().unwrap_or("-"),
114 peer = $self.socket.socket_ref().peer_addr().ok(),
115 position = $self.position,
116 state = $self.state,
117 streams = $self.streams.len(),
118 last_peer_id = $self.highest_peer_stream_id,
119 window = $self.flow_control.window,
120 draining = $self.drain.draining,
121 total_rst_streams_emitted_lifetime = $self.flood_detector.total_rst_streams_emitted_lifetime,
122 total_rst_received_lifetime = $self.flood_detector.total_rst_received_lifetime,
123 readiness = $self.readiness,
124 )
125 }};
126}
127
128/// Module-level prefix without session context, for logs emitted from
129/// free functions, `H2ConnectionConfig` validation and other sites where no
130/// `ConnectionH2` is in scope. Keeps the `MUX-H2` label consistent with
131/// connection logs and honours the colored flag.
132macro_rules! log_module_context {
133 () => {{
134 let (open, reset, _, _, _) = ansi_palette();
135 format!("{open}MUX-H2{reset}\t >>>", open = open, reset = reset)
136 }};
137}
138
139/// `if let Some(violation) = self.flood_detector.check_flood() { return self.handle_flood_violation(violation); }`
140/// pattern wrapped as a single statement. Pure dispatch — the actual flood
141/// thresholds and counters live inside `H2FloodDetector::check_flood` and
142/// `ConnectionH2::handle_flood_violation`, which the macro does not touch.
143/// Use this at every per-frame counter bump site so the wrapper stays
144/// uniform and a future grep for "flood-check forgot to return" finds zero.
145macro_rules! check_flood_or_return {
146 ($self:expr) => {
147 if let Some(violation) = $self.flood_detector.check_flood() {
148 return $self.handle_flood_violation(violation);
149 }
150 };
151}
152
153/// Outcome of a single-stream write flush in write_streams.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155enum FlushOutcome {
156 /// All queued bytes were drained to the socket.
157 Drained,
158 /// The socket blocked before the queue was drained. The caller must
159 /// arrange to resume (set expect_write or return from write_streams).
160 Stalled,
161}
162
163// ── RFC 9113 §6.5.2 Settings Defaults ───────────────────────────────────────
164
165const DEFAULT_HEADER_TABLE_SIZE: u32 = 4096;
166const DEFAULT_MAX_CONCURRENT_STREAMS: u32 = 100;
167pub(super) const DEFAULT_INITIAL_WINDOW_SIZE: u32 = (1 << 16) - 1; // 65535
168const DEFAULT_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
169
170// RFC 9113 §6.5.2: SETTINGS_MAX_FRAME_SIZE valid range [2^14, 2^24)
171const MIN_MAX_FRAME_SIZE: u32 = 1 << 14; // 16384
172const MAX_MAX_FRAME_SIZE: u32 = 1 << 24; // 16777216 (exclusive upper bound)
173
174// RFC 9113 §6.9: maximum flow control window size (2^31 - 1)
175const FLOW_CONTROL_MAX_WINDOW: u32 = (1 << 31) - 1;
176// RFC 9113 §5.1.1: stream identifiers are 31-bit unsigned integers (2^31 - 1).
177const STREAM_ID_MAX: u32 = 0x7FFF_FFFF;
178
179/// Allocate the next locally-initiated stream identifier given the current
180/// `last_stream_id` watermark, returning `(issued_id, next_last_stream_id)`
181/// or `None` when the 31-bit space is exhausted.
182///
183/// RFC 9113 §5.1.1 reserves odd identifiers for clients and even identifiers
184/// for servers. Sōzu never server-pushes, so in practice this helper is
185/// called on the backend (client) side via [`ConnectionH2::new_stream_id`].
186/// The server branch is kept symmetrical so the behaviour is exercised by
187/// the unit tests and remains correct if push is ever enabled.
188///
189/// `last_stream_id` tracks the even "watermark" (2, 4, 6, ...). A client call
190/// issues `watermark - 1` (odd), a server call issues `watermark - 2` (even).
191/// The helper enforces two invariants:
192/// - the issued identifier never exceeds `STREAM_ID_MAX` (2³¹ - 1); and
193/// - the returned watermark is a valid starting point for the next call.
194///
195/// Exhaustion is reported with `None` to the caller, which must emit
196/// GOAWAY(NO_ERROR) and stop issuing new streams on this connection
197/// (see `start_stream` for the client-side drain path).
198pub(super) fn next_stream_id(
199 last_stream_id: StreamId,
200 is_client: bool,
201) -> Option<(StreamId, StreamId)> {
202 let next = last_stream_id.checked_add(2)?;
203 let issued = if is_client {
204 next.checked_sub(1)?
205 } else {
206 next.checked_sub(2)?
207 };
208 // RFC 9113 §5.1.1: stream identifiers are 31-bit. Reject any allocation
209 // whose issued value would exceed `STREAM_ID_MAX`; the watermark itself
210 // is allowed to sit at `STREAM_ID_MAX + 1` (the sentinel that fails the
211 // next call).
212 if issued > STREAM_ID_MAX {
213 return None;
214 }
215 Some((issued, next))
216}
217
218/// Enlarged connection-level receive window (1 MB).
219/// The RFC 9113 default is 65 535 bytes, which is too small for high-throughput
220/// proxying and causes excessive WINDOW_UPDATE round-trips. 1 MB matches the
221/// initial window used by HAProxy, the h2 crate, and other production proxies.
222const ENLARGED_CONNECTION_WINDOW: u32 = 1_048_576;
223
224/// H2 client connection preface size: 24-byte magic + 9-byte SETTINGS frame header
225pub(super) const CLIENT_PREFACE_SIZE: usize = 24 + parser::FRAME_HEADER_SIZE;
226
227// ── Flood Detection Thresholds (CVE mitigations) ────────────────────────────
228
229/// Default maximum RST_STREAM frames per window (CVE-2023-44487 Rapid Reset + CVE-2019-9514)
230const DEFAULT_MAX_RST_STREAM_PER_WINDOW: u32 = 100;
231/// Hard lifetime cap on total RST_STREAM frames received on a single
232/// connection (CVE-2023-44487 Rapid Reset).
233///
234/// The per-window counter half-decays, which allows a patient attacker to
235/// sustain ~50 RST/sec indefinitely — each one costs the backend a request
236/// that will be cancelled before any response work is produced. A lifetime
237/// counter that never decays puts an absolute ceiling on that amplification
238/// per connection. 10 000 is generous for legitimate traffic (months of
239/// occasional client-side cancellations) but rapidly trips on the ~30/sec
240/// abusive pace reported in the CVE-2023-44487 advisory (~5 minutes).
241pub(super) const DEFAULT_MAX_RST_STREAM_LIFETIME: u64 = 10_000;
242/// Hard lifetime cap on RST_STREAM frames received BEFORE the corresponding
243/// backend response has started. These are the cheap-for-client /
244/// expensive-for-us resets that characterise Rapid Reset: the client pays
245/// one RST frame, we pay a round-trip to the backend plus request parsing.
246/// A much lower ceiling kills the attack well before 10 000 lifetime total.
247pub(super) const DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME: u64 = 50;
248/// Absolute lifetime cap on **server-emitted** RST_STREAM frames on a single
249/// connection (CVE-2025-8671 — "MadeYouReset"). Distinct from
250/// [`DEFAULT_MAX_RST_STREAM_LIFETIME`] which caps *received* RSTs
251/// (CVE-2023-44487 Rapid Reset).
252///
253/// MadeYouReset has the server talk itself into flooding: the attacker sends
254/// legitimate-looking frames that force the server to emit RST_STREAM (content
255/// -length mismatch, header parse error, rejected priority, zero-increment
256/// `WINDOW_UPDATE` on an open stream, …). Each forced RST costs the server a
257/// header-decode, kawa buffer setup and frame serialisation; uncapped, it
258/// becomes the same class of DoS as Rapid Reset but with a flipped emission
259/// direction.
260///
261/// 500 is conservative: legitimate traffic very rarely triggers a
262/// server-initiated RST (aside from graceful `NoError` cancels which are not
263/// counted), so crossing 500 on a single connection is a strong abuse signal.
264pub(super) const DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME: u64 = 500;
265/// Default maximum PING frames per window (CVE-2019-9512 Ping Flood)
266const DEFAULT_MAX_PING_PER_WINDOW: u32 = 100;
267/// Absolute lifetime cap on PING frames received on a single connection.
268/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
269/// keep-alives but trips on sustained low-rate abuse (CVE-2019-9512).
270const DEFAULT_MAX_PING_LIFETIME: u32 = 10_000;
271/// Default maximum SETTINGS frames per window (CVE-2019-9515 Settings Flood)
272const DEFAULT_MAX_SETTINGS_PER_WINDOW: u32 = 50;
273/// Absolute lifetime cap on SETTINGS frames received on a single connection.
274/// Mirrors DEFAULT_MAX_RST_STREAM_LIFETIME — generous for legitimate
275/// renegotiations but trips on sustained low-rate abuse (CVE-2019-9515).
276const DEFAULT_MAX_SETTINGS_LIFETIME: u32 = 10_000;
277/// Default maximum empty DATA frames per window (CVE-2019-9518 Empty Frames)
278const DEFAULT_MAX_EMPTY_DATA_PER_WINDOW: u32 = 100;
279/// Default maximum connection-level (stream 0) WINDOW_UPDATE frames per
280/// sliding window. Non-zero stream-0 WINDOW_UPDATE frames are otherwise
281/// uncounted by the generic glitch detector — a peer could burn proxy CPU by
282/// sending millions of legal-looking stream-0 WINDOW_UPDATEs. Value mirrors
283/// [`DEFAULT_MAX_EMPTY_DATA_PER_WINDOW`] / [`DEFAULT_MAX_PING_PER_WINDOW`] —
284/// legitimate proxies only need a handful per second.
285const DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW: u32 = 100;
286/// Default maximum CONTINUATION frames per header block (CVE-2024-27316)
287const DEFAULT_MAX_CONTINUATION_FRAMES: u32 = 20;
288/// Maximum accumulated header block size across CONTINUATION frames (64KB)
289pub(super) const MAX_HEADER_LIST_SIZE: usize = 65536;
290/// Default maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE)
291/// accepted from the peer. 64 KB is well above the RFC default of 4 KB
292/// while preventing a malicious peer from advertising up to 4 GB.
293const DEFAULT_MAX_HEADER_TABLE_SIZE: u32 = 65536;
294/// Duration of the sliding window for rate-based flood counters
295const FLOOD_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(1);
296/// Default maximum general anomaly count before triggering ENHANCE_YOUR_CALM
297const DEFAULT_MAX_GLITCH_COUNT: u32 = 100;
298
299/// RFC 9113 §5.1.2: threshold of `REFUSED_STREAM` emissions per
300/// [`BACKPRESSURE_WINDOW_DURATION`] that triggers back-pressure — at this
301/// point we halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` so the
302/// peer throttles its request rate instead of paying the RST round-trip for
303/// every new stream.
304const BACKPRESSURE_REFUSAL_THRESHOLD: u32 = 50;
305/// Sliding window used to detect refusal bursts for SETTINGS back-pressure.
306const BACKPRESSURE_WINDOW_DURATION: std::time::Duration = std::time::Duration::from_secs(60);
307
308/// Configurable thresholds for H2 flood detection.
309///
310/// All values have safe defaults matching the compile-time constants.
311/// When configured via listener config, `None` values fall back to these defaults.
312#[derive(Debug, Clone, Copy, PartialEq, Eq)]
313pub struct H2FloodConfig {
314 /// Maximum RST_STREAM frames per second window (CVE-2023-44487, CVE-2019-9514)
315 pub max_rst_stream_per_window: u32,
316 /// Maximum PING frames per second window (CVE-2019-9512)
317 pub max_ping_per_window: u32,
318 /// Maximum SETTINGS frames per second window (CVE-2019-9515)
319 pub max_settings_per_window: u32,
320 /// Maximum empty DATA frames per second window (CVE-2019-9518)
321 pub max_empty_data_per_window: u32,
322 /// Maximum connection-level (stream 0) WINDOW_UPDATE frames per sliding
323 /// window. Caps the CPU cost of a peer sending a flood of non-zero
324 /// stream-0 WINDOW_UPDATEs — each is individually legal so the generic
325 /// glitch counter does not trip, yet millions per connection still burn
326 /// server CPU parsing and updating the flow window.
327 pub max_window_update_stream0_per_window: u32,
328 /// Maximum CONTINUATION frames per header block (CVE-2024-27316)
329 pub max_continuation_frames: u32,
330 /// Maximum accumulated protocol anomalies before ENHANCE_YOUR_CALM
331 pub max_glitch_count: u32,
332 /// Absolute lifetime cap on RST_STREAM frames received on a single
333 /// connection (CVE-2023-44487). Never decays — provides a ceiling the
334 /// per-window counter cannot.
335 pub max_rst_stream_lifetime: u64,
336 /// Lifetime cap on "abusive" (pre-response-start) RST_STREAM frames —
337 /// the Rapid Reset signature (CVE-2023-44487).
338 pub max_rst_stream_abusive_lifetime: u64,
339 /// Absolute lifetime cap on **server-emitted** RST_STREAM frames for this
340 /// connection (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
341 /// count — graceful cancels are exempt.
342 pub max_rst_stream_emitted_lifetime: u64,
343 /// Maximum accumulated HPACK-decoded header list size per request
344 /// (SETTINGS_MAX_HEADER_LIST_SIZE, RFC 9113 §6.5.2).
345 pub max_header_list_size: u32,
346 /// Maximum HPACK dynamic table size (SETTINGS_HEADER_TABLE_SIZE) accepted
347 /// from the peer. Caps the value the peer advertises in SETTINGS frames to
348 /// prevent unbounded HPACK encoder memory growth.
349 pub max_header_table_size: u32,
350}
351
352impl Default for H2FloodConfig {
353 fn default() -> Self {
354 Self {
355 max_rst_stream_per_window: DEFAULT_MAX_RST_STREAM_PER_WINDOW,
356 max_ping_per_window: DEFAULT_MAX_PING_PER_WINDOW,
357 max_settings_per_window: DEFAULT_MAX_SETTINGS_PER_WINDOW,
358 max_empty_data_per_window: DEFAULT_MAX_EMPTY_DATA_PER_WINDOW,
359 max_window_update_stream0_per_window: DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW,
360 max_continuation_frames: DEFAULT_MAX_CONTINUATION_FRAMES,
361 max_glitch_count: DEFAULT_MAX_GLITCH_COUNT,
362 max_rst_stream_lifetime: DEFAULT_MAX_RST_STREAM_LIFETIME,
363 max_rst_stream_abusive_lifetime: DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME,
364 max_rst_stream_emitted_lifetime: DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME,
365 max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
366 max_header_table_size: DEFAULT_MAX_HEADER_TABLE_SIZE,
367 }
368 }
369}
370
371impl H2FloodConfig {
372 /// Create a validated config, clamping all thresholds to at least 1.
373 /// Zero thresholds would cause immediate flood detection on any frame.
374 #[allow(clippy::too_many_arguments)]
375 pub fn new(
376 max_rst_stream_per_window: u32,
377 max_ping_per_window: u32,
378 max_settings_per_window: u32,
379 max_empty_data_per_window: u32,
380 max_window_update_stream0_per_window: u32,
381 max_continuation_frames: u32,
382 max_glitch_count: u32,
383 max_rst_stream_lifetime: u64,
384 max_rst_stream_abusive_lifetime: u64,
385 max_rst_stream_emitted_lifetime: u64,
386 max_header_list_size: u32,
387 max_header_table_size: u32,
388 ) -> Self {
389 Self {
390 max_rst_stream_per_window: max_rst_stream_per_window.max(1),
391 max_ping_per_window: max_ping_per_window.max(1),
392 max_settings_per_window: max_settings_per_window.max(1),
393 max_empty_data_per_window: max_empty_data_per_window.max(1),
394 max_window_update_stream0_per_window: max_window_update_stream0_per_window.max(1),
395 max_continuation_frames: max_continuation_frames.max(1),
396 max_glitch_count: max_glitch_count.max(1),
397 max_rst_stream_lifetime: max_rst_stream_lifetime.max(1),
398 max_rst_stream_abusive_lifetime: max_rst_stream_abusive_lifetime.max(1),
399 max_rst_stream_emitted_lifetime: max_rst_stream_emitted_lifetime.max(1),
400 max_header_list_size: max_header_list_size.max(1),
401 max_header_table_size: max_header_table_size.max(1),
402 }
403 }
404}
405
406/// Default stream Vec shrink ratio: shrink when total > active * ratio.
407const DEFAULT_STREAM_SHRINK_RATIO: u32 = 2;
408
409/// Configurable H2 connection tuning parameters.
410///
411/// All values have safe defaults. When configured via listener config,
412/// absent values fall back to compile-time defaults.
413#[derive(Debug, Clone, Copy, PartialEq, Eq)]
414pub struct H2ConnectionConfig {
415 /// Connection-level receive window size in bytes (RFC 9113 §6.9.2).
416 pub initial_connection_window: u32,
417 /// Maximum concurrent streams (SETTINGS_MAX_CONCURRENT_STREAMS).
418 pub max_concurrent_streams: u32,
419 /// Shrink threshold ratio for recycled stream slots.
420 pub stream_shrink_ratio: u32,
421}
422
423impl Default for H2ConnectionConfig {
424 fn default() -> Self {
425 Self {
426 initial_connection_window: ENLARGED_CONNECTION_WINDOW,
427 max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
428 stream_shrink_ratio: DEFAULT_STREAM_SHRINK_RATIO,
429 }
430 }
431}
432
433impl H2ConnectionConfig {
434 /// Create a validated config, clamping to safe bounds.
435 ///
436 /// - `initial_connection_window`: clamped to \[65535, 2^31-1\] per RFC 9113 §6.9
437 /// - `max_concurrent_streams`: minimum 1
438 /// - `stream_shrink_ratio`: minimum 2 (1 would defeat slot recycling)
439 pub fn new(
440 initial_connection_window: u32,
441 max_concurrent_streams: u32,
442 stream_shrink_ratio: u32,
443 ) -> Self {
444 let clamped_window =
445 initial_connection_window.clamp(DEFAULT_INITIAL_WINDOW_SIZE, FLOW_CONTROL_MAX_WINDOW);
446 if clamped_window != initial_connection_window {
447 warn!(
448 "{} h2_initial_connection_window {} clamped to [{}, {}]",
449 log_module_context!(),
450 initial_connection_window,
451 DEFAULT_INITIAL_WINDOW_SIZE,
452 FLOW_CONTROL_MAX_WINDOW
453 );
454 }
455 const MAX_SAFE_CONCURRENT_STREAMS: u32 = 10_000;
456 let clamped_streams = max_concurrent_streams.clamp(1, MAX_SAFE_CONCURRENT_STREAMS);
457 if max_concurrent_streams > MAX_SAFE_CONCURRENT_STREAMS {
458 error!(
459 "{} h2_max_concurrent_streams={} exceeds safe limit, clamped to {}",
460 log_module_context!(),
461 max_concurrent_streams,
462 MAX_SAFE_CONCURRENT_STREAMS
463 );
464 }
465 if clamped_streams != max_concurrent_streams
466 && max_concurrent_streams <= MAX_SAFE_CONCURRENT_STREAMS
467 {
468 warn!(
469 "{} h2_max_concurrent_streams {} clamped to minimum 1",
470 log_module_context!(),
471 max_concurrent_streams
472 );
473 }
474 let clamped_ratio = stream_shrink_ratio.max(2);
475 if clamped_ratio != stream_shrink_ratio {
476 warn!(
477 "{} h2_stream_shrink_ratio {} clamped to minimum 2",
478 log_module_context!(),
479 stream_shrink_ratio
480 );
481 }
482 Self {
483 initial_connection_window: clamped_window,
484 max_concurrent_streams: clamped_streams,
485 stream_shrink_ratio: clamped_ratio,
486 }
487 }
488
489 /// Create from optional config values, falling back to compile-time defaults.
490 /// Combines unwrap-or-default with validation clamping.
491 pub fn from_optional(
492 window: Option<u32>,
493 max_streams: Option<u32>,
494 shrink_ratio: Option<u32>,
495 ) -> Self {
496 let defaults = Self::default();
497 Self::new(
498 window.unwrap_or(defaults.initial_connection_window),
499 max_streams.unwrap_or(defaults.max_concurrent_streams),
500 shrink_ratio.unwrap_or(defaults.stream_shrink_ratio),
501 )
502 }
503}
504
505/// Default pending WINDOW_UPDATE capacity (used in tests).
506/// The actual per-connection cap is computed from `connection_config.max_concurrent_streams`.
507#[cfg(test)]
508const DEFAULT_MAX_PENDING_WINDOW_UPDATES: usize = 1 + DEFAULT_MAX_CONCURRENT_STREAMS as usize * 4;
509
510/// Maximum number of pending RST_STREAM frames before triggering GOAWAY.
511/// When a peer causes excessive RST_STREAM queueing (e.g. rapid stream creation
512/// beyond MAX_CONCURRENT_STREAMS), this cap prevents unbounded memory growth
513/// and triggers an ENHANCE_YOUR_CALM connection error.
514const MAX_PENDING_RST_STREAMS: usize = 200;
515
516/// RFC 9113 §6.5: maximum time (in seconds) to wait for SETTINGS ACK before
517/// sending GOAWAY with SETTINGS_TIMEOUT error code.
518const SETTINGS_ACK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5);
519
520#[inline(always)]
521fn error_nom_to_h2(error: nom::Err<parser::ParserError>) -> H2Error {
522 match error {
523 nom::Err::Error(parser::ParserError {
524 kind: parser::ParserErrorKind::H2(e),
525 ..
526 }) => e,
527 nom::Err::Failure(parser::ParserError {
528 kind: parser::ParserErrorKind::H2(e),
529 ..
530 }) => e,
531 _ => H2Error::ProtocolError,
532 }
533}
534
535/// Distribute connection-level byte overhead proportionally to a single stream.
536///
537/// Overhead is distributed in proportion to the bytes this stream transferred
538/// relative to the total across all active streams. A stream that transferred
539/// 60% of total bytes gets 60% of the overhead.
540///
541/// `stream_bytes` and `total_bytes` are `(bytes_in, bytes_out)` tuples.
542/// Falls back to even distribution (1/active_streams) when no stream has
543/// transferred any bytes yet (total is zero).
544///
545/// Extracted as a free function to avoid borrow conflicts when `self` fields
546/// (e.g. `encoder`) are borrowed by the converter while we need to update
547/// per-stream metrics and connection overhead counters.
548fn distribute_overhead(
549 metrics: &mut SessionMetrics,
550 overhead_bin: &mut usize,
551 overhead_bout: &mut usize,
552 stream_bytes: (usize, usize),
553 total_bytes: (usize, usize),
554 active_streams: usize,
555 is_last_stream: bool,
556) {
557 let share_in = if is_last_stream {
558 // Last stream gets all remaining overhead to avoid losing remainder bytes
559 // from integer division across earlier streams.
560 *overhead_bin
561 } else if total_bytes.0 > 0 {
562 // Clamp to remaining overhead — integer division rounding across multiple
563 // streams can cause accumulated shares to exceed the total.
564 (*overhead_bin * stream_bytes.0 / total_bytes.0).min(*overhead_bin)
565 } else {
566 // No stream has transferred any inbound bytes — fall back to even split.
567 *overhead_bin / active_streams.max(1)
568 };
569 let share_out = if is_last_stream {
570 *overhead_bout
571 } else if total_bytes.1 > 0 {
572 (*overhead_bout * stream_bytes.1 / total_bytes.1).min(*overhead_bout)
573 } else {
574 // No stream has transferred any outbound bytes — fall back to even split.
575 *overhead_bout / active_streams.max(1)
576 };
577 metrics.bin += share_in;
578 metrics.bout += share_out;
579 *overhead_bin -= share_in;
580 *overhead_bout -= share_out;
581}
582
583/// LIFECYCLE §9 invariant 16 probe: returns `true` if any open stream still
584/// has outbound kawa bytes queued (`back.out` non-empty or `back.blocks`
585/// non-drained).
586///
587/// Used by `finalize_write` to preserve `Ready::WRITABLE` across a voluntary
588/// scheduler yield, and by `has_pending_write_full` to block shutdown-drain
589/// while bytes are still owed to the frontend.
590///
591/// `.get()` rather than direct indexing: an unknown `GlobalStreamId` is
592/// treated as "no pending bytes" rather than panicking — defence-in-depth
593/// against a stream-removal race during shutdown.
594fn any_stream_has_pending_back(
595 streams: &HashMap<StreamId, GlobalStreamId>,
596 context_streams: &[Stream],
597) -> bool {
598 any_stream_id_matches(streams, |gid| {
599 context_streams
600 .get(gid)
601 .is_some_and(|s| !s.back.out.is_empty() || !s.back.blocks.is_empty())
602 })
603}
604
605/// Iteration core of [`any_stream_has_pending_back`], split out so the
606/// invariant-16 dispatch is unit-testable without a full [`Stream`] fixture
607/// (the existing test module only covers `H2FloodDetector`).
608fn any_stream_id_matches<F>(streams: &HashMap<StreamId, GlobalStreamId>, mut probe: F) -> bool
609where
610 F: FnMut(GlobalStreamId) -> bool,
611{
612 streams.values().any(|gid| probe(*gid))
613}
614
615/// Core of [`ConnectionH2::enqueue_rst`], extracted so the RST-queueing
616/// semantics (dedupe, queued-cap counter bump, invariant-15 readiness rearm)
617/// can be unit-tested without building a full `ConnectionH2<Front>` fixture.
618///
619/// Invariants enforced:
620/// - **Dedupe** via `rst_sent`: at most one queued RST per wire stream id.
621/// `HashSet::insert` returns `false` when the id is already present; we
622/// short-circuit on that branch to keep `pending_rst_streams`,
623/// `total_rst_streams_queued` and the wire counts consistent.
624/// - **MadeYouReset queued cap** (`MAX_PENDING_RST_STREAMS`): each freshly
625/// queued RST bumps `total_rst_streams_queued`, which
626/// `flush_pending_control_frames` polices to escalate to
627/// `GOAWAY(ENHANCE_YOUR_CALM)` when exceeded.
628/// - **Invariant 15** (edge-triggered epoll): pair `Ready::WRITABLE` interest
629/// with the event bit so `writable()` is scheduled on the next tick.
630///
631/// Returns `true` when the RST was freshly queued, `false` when the
632/// stream was already in `rst_sent` (the caller asked to RST the same
633/// stream twice — a benign re-entrant idempotency, NOT a new wire
634/// emission). The boolean lets [`ConnectionH2::enqueue_rst`] account
635/// the RST only on the freshly-queued path so duplicate calls do not
636/// inflate the per-error counter or trip the MadeYouReset flood cap
637/// for frames that never reach the wire.
638fn enqueue_rst_into(
639 pending: &mut Vec<(StreamId, H2Error)>,
640 total: &mut usize,
641 rst_sent: &mut HashSet<StreamId>,
642 readiness: &mut Readiness,
643 wire_stream_id: StreamId,
644 error: H2Error,
645) -> bool {
646 if !rst_sent.insert(wire_stream_id) {
647 return false;
648 }
649 pending.push((wire_stream_id, error));
650 *total += 1;
651 readiness.arm_writable();
652 true
653}
654
655/// Detail of a flood-threshold violation returned by
656/// [`H2FloodDetector::check_flood`] and [`H2FloodDetector::record_rst_lifetime`].
657///
658/// Carrying `(reason, count, threshold)` lets the caller emit a session-scoped
659/// log line with full context — the detector itself is connection-agnostic and
660/// never logs.
661#[derive(Debug, Clone, PartialEq)]
662pub struct H2FloodViolation {
663 /// HTTP/2 error code to emit on the GOAWAY.
664 pub error: H2Error,
665 /// Human-readable name of the counter that tripped (e.g. `"RST_STREAM"`).
666 pub reason: &'static str,
667 /// Statsd metric key emitted by [`ConnectionH2::handle_flood_violation`].
668 /// Carried alongside `reason` so a single field maps to both the log line
669 /// and the dashboard counter — adding a new violation kind requires
670 /// choosing both at the construction site, preventing drift.
671 pub metric_key: &'static str,
672 /// Observed counter value at the moment of detection.
673 pub count: u64,
674 /// Configured ceiling that was crossed.
675 pub threshold: u64,
676}
677
678/// Tracks per-connection frame rates to detect and mitigate H2 flood attacks.
679///
680/// Monitors RST_STREAM (CVE-2023-44487), PING (CVE-2019-9512), SETTINGS (CVE-2019-9515),
681/// empty DATA (CVE-2019-9518), and CONTINUATION (CVE-2024-27316) flood patterns.
682/// When any counter exceeds its threshold, `check_flood()` returns the violation
683/// detail so callers can log with connection context before sending GOAWAY.
684///
685/// Thresholds are configurable via [`H2FloodConfig`], with safe defaults matching
686/// the original compile-time constants.
687#[derive(Debug)]
688pub struct H2FloodDetector {
689 /// RST_STREAM frames received in current window (CVE-2023-44487 + CVE-2019-9514)
690 pub(super) rst_stream_count: u32,
691 /// Lifetime RST_STREAM frames received on this connection.
692 ///
693 /// Never decays — provides an absolute ceiling that the half-decaying
694 /// per-window counter cannot, preventing a sustained ~50 RST/sec burst
695 /// from running forever.
696 pub(super) total_rst_received_lifetime: u64,
697 /// Lifetime RST_STREAM frames received that targeted a stream whose
698 /// backend response had not yet started. These are the "Rapid Reset"
699 /// signature — cheap for the attacker, expensive for the proxy — and
700 /// trip on a much lower ceiling than the generic lifetime counter.
701 pub(super) total_abusive_rst_received_lifetime: u64,
702 /// Lifetime RST_STREAM frames **emitted by the server** on this
703 /// connection (CVE-2025-8671 "MadeYouReset" mitigation). Incremented
704 /// inside [`ConnectionH2::reset_stream`] whenever a non-`NoError` reset
705 /// is triggered by an attacker-crafted frame (content-length mismatch,
706 /// header parse error, priority rejection, zero-increment WINDOW_UPDATE
707 /// on an open stream). Never decays — provides an absolute ceiling that
708 /// short-circuits patient-attacker patterns that stay under any windowed
709 /// counter.
710 pub(super) total_rst_streams_emitted_lifetime: u64,
711 /// PING frames received in current window (CVE-2019-9512)
712 pub(super) ping_count: u32,
713 /// Lifetime PING frames received on this connection.
714 ///
715 /// Never decays — provides an absolute ceiling that the half-decaying
716 /// per-window counter cannot, preventing sustained low-rate PING abuse.
717 pub(super) total_ping_received_lifetime: u32,
718 /// SETTINGS frames received in current window (CVE-2019-9515)
719 pub(super) settings_count: u32,
720 /// Lifetime SETTINGS frames received on this connection.
721 ///
722 /// Never decays — provides an absolute ceiling that the half-decaying
723 /// per-window counter cannot, preventing sustained low-rate SETTINGS abuse.
724 pub(super) total_settings_received_lifetime: u32,
725 /// Empty DATA frames received in current window (CVE-2019-9518)
726 pub(super) empty_data_count: u32,
727 /// Connection-level (stream 0) WINDOW_UPDATE frames received in current
728 /// sliding window. Half-decays with [`maybe_reset_window`] like other
729 /// rate counters. Increments on non-zero stream-0 WINDOW_UPDATEs only —
730 /// zero-increment frames short-circuit into GOAWAY(PROTOCOL_ERROR) per
731 /// RFC 9113 §6.9 before reaching this counter.
732 pub(super) window_update_stream0_count: u32,
733 /// CONTINUATION frames received for current header block (CVE-2024-27316)
734 pub(super) continuation_count: u32,
735 /// Total accumulated header block size across CONTINUATION frames
736 pub(super) accumulated_header_size: u32,
737 /// General anomaly counter
738 pub(super) glitch_count: u32,
739 /// Window start for rate-based counters
740 pub(super) window_start: Instant,
741 /// Configurable thresholds for flood detection
742 pub(super) config: H2FloodConfig,
743}
744
745impl Default for H2FloodDetector {
746 fn default() -> Self {
747 Self::new(H2FloodConfig::default())
748 }
749}
750
751impl H2FloodDetector {
752 pub fn new(config: H2FloodConfig) -> Self {
753 Self {
754 rst_stream_count: 0,
755 total_rst_received_lifetime: 0,
756 total_abusive_rst_received_lifetime: 0,
757 total_rst_streams_emitted_lifetime: 0,
758 ping_count: 0,
759 total_ping_received_lifetime: 0,
760 settings_count: 0,
761 total_settings_received_lifetime: 0,
762 empty_data_count: 0,
763 window_update_stream0_count: 0,
764 continuation_count: 0,
765 accumulated_header_size: 0,
766 glitch_count: 0,
767 window_start: Instant::now(),
768 config,
769 }
770 }
771
772 /// Increment the lifetime RST_STREAM counters and return a
773 /// [`H2FloodViolation`] if either the global or the abusive
774 /// (pre-response-start) lifetime cap has been exceeded.
775 ///
776 /// `response_started` indicates whether the backend response had already
777 /// begun when the RST arrived; `false` is the cheap-for-client /
778 /// expensive-for-us Rapid Reset signature (CVE-2023-44487).
779 pub fn record_rst_lifetime(&mut self, response_started: bool) -> Option<H2FloodViolation> {
780 self.total_rst_received_lifetime = self.total_rst_received_lifetime.saturating_add(1);
781 if !response_started {
782 self.total_abusive_rst_received_lifetime =
783 self.total_abusive_rst_received_lifetime.saturating_add(1);
784 }
785 if self.total_rst_received_lifetime > self.config.max_rst_stream_lifetime {
786 return Some(H2FloodViolation {
787 error: H2Error::EnhanceYourCalm,
788 reason: "Rapid Reset: lifetime RST_STREAM",
789 metric_key: "h2.flood.violation.rst_stream_lifetime",
790 count: self.total_rst_received_lifetime,
791 threshold: self.config.max_rst_stream_lifetime,
792 });
793 }
794 if self.total_abusive_rst_received_lifetime > self.config.max_rst_stream_abusive_lifetime {
795 return Some(H2FloodViolation {
796 error: H2Error::EnhanceYourCalm,
797 reason: "Rapid Reset: lifetime pre-response RST_STREAM",
798 metric_key: "h2.flood.violation.rst_stream_pre_response_lifetime",
799 count: self.total_abusive_rst_received_lifetime,
800 threshold: self.config.max_rst_stream_abusive_lifetime,
801 });
802 }
803 None
804 }
805
806 /// Increment the lifetime **server-emitted** RST_STREAM counter and
807 /// return a [`H2FloodViolation`] once the configured ceiling is exceeded.
808 ///
809 /// Call sites are the error paths inside [`ConnectionH2::reset_stream`]
810 /// where an attacker-crafted frame coerces the server into emitting a
811 /// RST_STREAM (CVE-2025-8671 "MadeYouReset"). Only non-`NoError` resets
812 /// are reported — callers must exclude graceful cancels.
813 pub fn record_rst_emitted(&mut self) -> Option<H2FloodViolation> {
814 self.total_rst_streams_emitted_lifetime =
815 self.total_rst_streams_emitted_lifetime.saturating_add(1);
816 if self.total_rst_streams_emitted_lifetime > self.config.max_rst_stream_emitted_lifetime {
817 return Some(H2FloodViolation {
818 error: H2Error::EnhanceYourCalm,
819 reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
820 metric_key: "h2.flood.violation.rst_stream_emitted_lifetime",
821 count: self.total_rst_streams_emitted_lifetime,
822 threshold: self.config.max_rst_stream_emitted_lifetime,
823 });
824 }
825 None
826 }
827
828 /// Half-decay rate-based counters if the current window has expired.
829 /// Uses half-window decay instead of full reset to catch burst-then-wait attacks.
830 fn maybe_reset_window(&mut self) {
831 if self.window_start.elapsed() >= FLOOD_WINDOW_DURATION {
832 self.rst_stream_count /= 2;
833 self.ping_count /= 2;
834 self.settings_count /= 2;
835 self.empty_data_count /= 2;
836 self.window_update_stream0_count /= 2;
837 self.glitch_count /= 2;
838 self.window_start = Instant::now();
839 }
840 }
841
842 /// Check all flood counters. Returns a [`H2FloodViolation`] when a threshold
843 /// is exceeded; the caller is responsible for logging with session context
844 /// and escalating to GOAWAY.
845 pub fn check_flood(&mut self) -> Option<H2FloodViolation> {
846 self.maybe_reset_window();
847
848 fn flag(
849 reason: &'static str,
850 metric_key: &'static str,
851 count: u32,
852 threshold: u32,
853 ) -> Option<H2FloodViolation> {
854 if count > threshold {
855 Some(H2FloodViolation {
856 error: H2Error::EnhanceYourCalm,
857 reason,
858 metric_key,
859 count: count as u64,
860 threshold: threshold as u64,
861 })
862 } else {
863 None
864 }
865 }
866
867 flag(
868 "RST_STREAM",
869 "h2.flood.violation.rst_stream_window",
870 self.rst_stream_count,
871 self.config.max_rst_stream_per_window,
872 )
873 .or_else(|| {
874 flag(
875 "PING",
876 "h2.flood.violation.ping_window",
877 self.ping_count,
878 self.config.max_ping_per_window,
879 )
880 })
881 .or_else(|| {
882 flag(
883 "PING lifetime",
884 "h2.flood.violation.ping_lifetime",
885 self.total_ping_received_lifetime,
886 DEFAULT_MAX_PING_LIFETIME,
887 )
888 })
889 .or_else(|| {
890 flag(
891 "SETTINGS",
892 "h2.flood.violation.settings_window",
893 self.settings_count,
894 self.config.max_settings_per_window,
895 )
896 })
897 .or_else(|| {
898 flag(
899 "SETTINGS lifetime",
900 "h2.flood.violation.settings_lifetime",
901 self.total_settings_received_lifetime,
902 DEFAULT_MAX_SETTINGS_LIFETIME,
903 )
904 })
905 .or_else(|| {
906 flag(
907 "empty DATA",
908 "h2.flood.violation.empty_data_window",
909 self.empty_data_count,
910 self.config.max_empty_data_per_window,
911 )
912 })
913 .or_else(|| {
914 flag(
915 "CONTINUATION",
916 "h2.flood.violation.continuation_per_block",
917 self.continuation_count,
918 self.config.max_continuation_frames,
919 )
920 })
921 .or_else(|| {
922 flag(
923 "WINDOW_UPDATE stream 0",
924 "h2.flood.violation.window_update_stream0_window",
925 self.window_update_stream0_count,
926 self.config.max_window_update_stream0_per_window,
927 )
928 })
929 .or_else(|| {
930 flag(
931 "accumulated header size",
932 "h2.flood.violation.header_size_per_block",
933 self.accumulated_header_size,
934 self.config.max_header_list_size,
935 )
936 })
937 .or_else(|| {
938 flag(
939 "glitch",
940 "h2.flood.violation.glitch_window",
941 self.glitch_count,
942 self.config.max_glitch_count,
943 )
944 })
945 }
946
947 /// Reset CONTINUATION-specific counters when a header block is complete.
948 pub fn reset_continuation(&mut self) {
949 self.continuation_count = 0;
950 self.accumulated_header_size = 0;
951 }
952}
953
954#[derive(Debug)]
955pub enum H2State {
956 ClientPreface,
957 ClientSettings,
958 ServerSettings,
959 Header,
960 Frame(FrameHeader),
961 ContinuationHeader(Headers),
962 ContinuationFrame(Headers),
963 GoAway,
964 Error,
965 Discard,
966}
967
968#[derive(Debug, Clone, Copy)]
969pub struct H2Settings {
970 pub settings_header_table_size: u32,
971 pub settings_enable_push: bool,
972 pub settings_max_concurrent_streams: u32,
973 pub settings_initial_window_size: u32,
974 pub settings_max_frame_size: u32,
975 pub settings_max_header_list_size: u32,
976 /// RFC 8441
977 pub settings_enable_connect_protocol: bool,
978 /// RFC 9218
979 pub settings_no_rfc7540_priorities: bool,
980}
981
982impl Default for H2Settings {
983 fn default() -> Self {
984 Self {
985 settings_header_table_size: DEFAULT_HEADER_TABLE_SIZE,
986 settings_enable_push: false,
987 settings_max_concurrent_streams: DEFAULT_MAX_CONCURRENT_STREAMS,
988 settings_initial_window_size: DEFAULT_INITIAL_WINDOW_SIZE,
989 settings_max_frame_size: DEFAULT_MAX_FRAME_SIZE,
990 settings_max_header_list_size: MAX_HEADER_LIST_SIZE as u32,
991 settings_enable_connect_protocol: false,
992 settings_no_rfc7540_priorities: true,
993 }
994 }
995}
996
997/// RFC 9218 Extensible Priorities for HTTP stream scheduling.
998///
999/// Stores per-stream urgency (0-7, lower = more important) and incremental
1000/// flag. Used by `writable()` to sort streams: lower urgency first, then
1001/// stream ID for stability among same-urgency non-incremental streams.
1002///
1003/// Within a same-urgency bucket the scheduler (see
1004/// [`ConnectionH2::write_streams`]) drains non-incremental streams
1005/// sequentially, then applies RFC 9218 §4 round-robin to the incremental
1006/// streams starting from [`Self::incremental_cursor`], so multiple concurrent
1007/// downloads at the same urgency interleave their DATA frames fairly.
1008///
1009/// Streams without an explicit `priority` header get the RFC 9218 defaults:
1010/// urgency 3, incremental false.
1011#[derive(Default)]
1012pub struct Prioriser {
1013 /// Per-stream priority: stream_id -> (urgency 0-7, incremental flag)
1014 priorities: HashMap<StreamId, (u8, bool)>,
1015 /// RFC 9218 §4 round-robin cursor: stream ID that fired first in the
1016 /// last write pass over the incremental tail of the lowest-urgency
1017 /// bucket that contained at least one incremental stream. The next pass
1018 /// starts from the stream immediately after this ID (wrapping around),
1019 /// so a single slow-draining stream cannot hog the connection.
1020 ///
1021 /// `0` is the "no cursor yet" sentinel and means "start from the
1022 /// smallest ID in the bucket" — H2 stream IDs are always > 0.
1023 incremental_cursor: StreamId,
1024}
1025
1026/// RFC 9218 §4 default urgency value.
1027const DEFAULT_URGENCY: u8 = 3;
1028
1029/// Maximum entries in the priority map to prevent flooding via PRIORITY frames.
1030const MAX_PRIORITIES: usize = 4096;
1031
1032/// Small look-ahead window (in stream IDs) for PRIORITY frames that arrive
1033/// slightly before the peer opens the corresponding stream. RFC 9218 allows
1034/// PRIORITY to be sent for an idle stream that the peer intends to open
1035/// soon. Past this budget we assume the ID will never be used and drop the
1036/// entry, preventing flooding with far-future stream IDs.
1037const PRIORITY_IDLE_LOOKAHEAD: u32 = 64;
1038
1039impl Prioriser {
1040 /// Record or update the priority for a stream that we know exists or are
1041 /// currently processing (used from pkawa's header-handling path where the
1042 /// owning stream's HEADERS frame is being decoded).
1043 ///
1044 /// Returns `true` if the priority is invalid (self-dependency for RFC 7540),
1045 /// signalling the caller should reset the stream with a protocol error.
1046 pub fn push_priority(&mut self, stream_id: StreamId, priority: parser::PriorityPart) -> bool {
1047 trace!(
1048 "{} PRIORITY REQUEST FOR {}: {:?}",
1049 log_module_context!(),
1050 stream_id,
1051 priority
1052 );
1053 // Cap the priority map to prevent flooding via PRIORITY frames
1054 if !self.priorities.contains_key(&stream_id) && self.priorities.len() >= MAX_PRIORITIES {
1055 return false;
1056 }
1057 match priority {
1058 parser::PriorityPart::Rfc7540 {
1059 stream_dependency,
1060 weight: _,
1061 } => {
1062 // RFC 9113 §5.3.1: a stream cannot depend on itself; signal
1063 // the caller to RST_STREAM with PROTOCOL_ERROR. Otherwise the
1064 // RFC 7540 priority tree is deprecated and silently ignored.
1065 stream_dependency.stream_id == stream_id
1066 }
1067 parser::PriorityPart::Rfc9218 {
1068 urgency,
1069 incremental,
1070 } => {
1071 // RFC 9218 §7.1: a malformed or out-of-range priority field
1072 // MUST be "treated as absent", NOT as a stream error. Clamping
1073 // an urgency > 7 to 7 is the policy-correct interpretation:
1074 // the field is still present (so defaulting would lose
1075 // information) but its value is normalised to the RFC's
1076 // allowed range [0..=7]. Intentionally not PROTOCOL_ERROR.
1077 self.priorities
1078 .insert(stream_id, (urgency.min(7), incremental));
1079 false
1080 }
1081 }
1082 }
1083
1084 /// Record or update the priority for a stream ID that arrived via a
1085 /// standalone PRIORITY frame.
1086 ///
1087 /// Pass 3 Medium #4: without this guard, a peer could send PRIORITY for
1088 /// arbitrary stream IDs (e.g. 2^31 ever-increasing IDs) and pin up to
1089 /// `MAX_PRIORITIES` entries of memory. Accept only:
1090 /// - an ID that corresponds to a currently-open stream (`open_streams`);
1091 /// - an idle ID slightly ahead of `last_stream_id` (within
1092 /// [`PRIORITY_IDLE_LOOKAHEAD`]), matching RFC 9218's "set priority for
1093 /// a stream about to be opened" pattern.
1094 ///
1095 /// IDs in the past that we do not currently track (already closed) and
1096 /// IDs too far in the future are silently dropped. The `MAX_PRIORITIES`
1097 /// ceiling is preserved as a defensive backstop if both filters are ever
1098 /// circumvented.
1099 ///
1100 /// Returns the same value semantics as [`Self::push_priority`].
1101 pub fn push_priority_guarded(
1102 &mut self,
1103 stream_id: StreamId,
1104 priority: parser::PriorityPart,
1105 last_stream_id: StreamId,
1106 open_streams: &HashMap<StreamId, GlobalStreamId>,
1107 ) -> bool {
1108 if !self.is_acceptable(stream_id, last_stream_id, open_streams) {
1109 trace!(
1110 "{} PRIORITY dropped for unknown/far stream {} (last_stream_id={})",
1111 log_module_context!(),
1112 stream_id,
1113 last_stream_id
1114 );
1115 return false;
1116 }
1117 self.push_priority(stream_id, priority)
1118 }
1119
1120 fn is_acceptable(
1121 &self,
1122 stream_id: StreamId,
1123 last_stream_id: StreamId,
1124 open_streams: &HashMap<StreamId, GlobalStreamId>,
1125 ) -> bool {
1126 if open_streams.contains_key(&stream_id) {
1127 return true;
1128 }
1129 // Idle stream ahead of the current counter: accept a small look-ahead.
1130 // Past IDs that are NOT in `open_streams` are closed — drop them.
1131 let upper = last_stream_id.saturating_add(PRIORITY_IDLE_LOOKAHEAD);
1132 stream_id > last_stream_id && stream_id <= upper
1133 }
1134
1135 /// Remove a stream's priority entry (called when the stream is recycled).
1136 pub fn remove(&mut self, stream_id: &StreamId) {
1137 self.priorities.remove(stream_id);
1138 }
1139
1140 /// Look up the priority for a stream, returning RFC 9218 defaults if absent.
1141 #[inline]
1142 pub fn get(&self, stream_id: &StreamId) -> (u8, bool) {
1143 self.priorities
1144 .get(stream_id)
1145 .copied()
1146 .unwrap_or((DEFAULT_URGENCY, false))
1147 }
1148
1149 /// Reorder a pre-sorted slice of writable stream IDs so that inside each
1150 /// urgency bucket, incremental streams appear after non-incremental ones,
1151 /// and the incremental tail is rotated by [`Self::incremental_cursor`]
1152 /// (RFC 9218 §4).
1153 ///
1154 /// The input `buf` must already be sorted by `(urgency, stream_id)`:
1155 /// this routine only partitions and rotates inside same-urgency
1156 /// contiguous runs, it does not re-sort.
1157 ///
1158 /// Returns the total number of incremental streams seen, so callers that
1159 /// need to update the cursor at the end of the write pass can early-exit
1160 /// when the count is zero.
1161 pub fn apply_incremental_rotation(&self, buf: &mut [StreamId]) -> usize {
1162 let mut total_incremental = 0usize;
1163 let mut i = 0;
1164 while i < buf.len() {
1165 let (urgency_i, _) = self.get(&buf[i]);
1166 let mut j = i + 1;
1167 while j < buf.len() {
1168 let (urgency_j, _) = self.get(&buf[j]);
1169 if urgency_j != urgency_i {
1170 break;
1171 }
1172 j += 1;
1173 }
1174 // `buf[i..j]` is a contiguous run of same-urgency stream IDs.
1175 let bucket = &mut buf[i..j];
1176 if bucket.len() > 1 {
1177 // Stable partition: non-incremental first, incremental last,
1178 // each subrange staying in ascending stream-id order.
1179 bucket.sort_by_key(|id| self.get(id).1);
1180 let split = bucket.partition_point(|id| !self.get(id).1);
1181 let incremental_tail = &mut bucket[split..];
1182 if incremental_tail.len() > 1 {
1183 // Rotate so the pass starts right after the stream that
1184 // fired first previously. `partition_point` returns the
1185 // first index whose stream ID > cursor (so cursor itself
1186 // is still drained, but after the streams ahead of it).
1187 let start =
1188 incremental_tail.partition_point(|id| *id <= self.incremental_cursor);
1189 incremental_tail.rotate_left(start);
1190 }
1191 total_incremental += incremental_tail.len();
1192 } else if bucket.len() == 1 && self.get(&bucket[0]).1 {
1193 total_incremental += 1;
1194 }
1195 i = j;
1196 }
1197 total_incremental
1198 }
1199
1200 /// Advance the RFC 9218 §4 round-robin cursor after a write pass.
1201 ///
1202 /// `first_incremental_fired` is the stream ID that headed the incremental
1203 /// tail we just drained; the next pass will start at the next stream
1204 /// after that ID. Callers may pass `None` when no incremental streams
1205 /// were eligible, leaving the cursor where it was.
1206 pub fn advance_incremental_cursor(&mut self, first_incremental_fired: Option<StreamId>) {
1207 if let Some(id) = first_incremental_fired {
1208 self.incremental_cursor = id;
1209 }
1210 }
1211}
1212
1213/// Connection-level flow control state (RFC 9113 §6.9).
1214pub struct H2FlowControl {
1215 /// Connection-level send window (can go negative per RFC 9113 §6.9.2).
1216 pub window: i32,
1217 /// Bytes received since last connection-level WINDOW_UPDATE.
1218 pub received_bytes_since_update: u32,
1219 /// Queued stream_id -> accumulated increment for WINDOW_UPDATE frames (O(1) coalescing).
1220 pub pending_window_updates: HashMap<u32, u32>,
1221}
1222
1223/// Byte accounting for connection overhead attribution.
1224pub struct H2ByteAccounting {
1225 /// Bytes read on the zero stream not yet attributed to a stream.
1226 pub zero_bytes_read: usize,
1227 /// Overhead bytes received (connection-level frames).
1228 pub overhead_bin: usize,
1229 /// Overhead bytes sent (connection-level frames).
1230 pub overhead_bout: usize,
1231}
1232
1233/// Connection draining state for graceful shutdown.
1234pub struct H2DrainState {
1235 /// True when we've sent GOAWAY and are draining.
1236 pub draining: bool,
1237 /// Last stream ID from peer's GOAWAY (for retry decisions).
1238 pub peer_last_stream_id: Option<StreamId>,
1239 /// Wall-clock timestamp captured the first time this connection entered
1240 /// `draining` during soft-stop. Used together with
1241 /// [`Self::graceful_shutdown_deadline`] to decide when to force-close.
1242 /// Remains `None` until the proxy-initiated drain begins (peer-initiated
1243 /// drains via `handle_goaway_frame` don't arm the forced-close timer —
1244 /// the caller in `Mux::shutting_down` is the only writer).
1245 pub started_at: Option<Instant>,
1246 /// Wall-clock budget granted to in-flight streams after the initial
1247 /// `GOAWAY(NO_ERROR)`. `None` means "wait indefinitely" (knob value `0`).
1248 /// Default when unset upstream: 5 s (see `L7ListenerHandler`).
1249 pub graceful_shutdown_deadline: Option<std::time::Duration>,
1250}
1251
1252pub struct ConnectionH2<Front: SocketHandler> {
1253 /// Connection/session ULID propagated from the parent [`Mux`]. Used to
1254 /// stamp the session slot of the `[session req cluster backend]` log
1255 /// prefix emitted by this module's `log_context!` / `log_context_stream!`
1256 /// macros.
1257 pub session_ulid: Ulid,
1258 pub decoder: loona_hpack::Decoder<'static>,
1259 pub encoder: loona_hpack::Encoder<'static>,
1260 pub expect_read: Option<(H2StreamId, usize)>,
1261 pub expect_write: Option<H2StreamId>,
1262 pub last_stream_id: StreamId,
1263 pub local_settings: H2Settings,
1264 pub peer_settings: H2Settings,
1265 pub position: Position,
1266 pub prioriser: Prioriser,
1267 pub readiness: Readiness,
1268 pub socket: Front,
1269 pub state: H2State,
1270 pub streams: HashMap<StreamId, GlobalStreamId>,
1271 pub timeout_container: TimeoutContainer,
1272 /// Connection-level flow control state (send window, receive tracking, pending updates).
1273 pub flow_control: H2FlowControl,
1274 /// Highest stream ID accepted from the peer (used for GoAway last_stream_id).
1275 pub highest_peer_stream_id: StreamId,
1276 /// RFC 7541 §4.2 / §6.3 pending dynamic-table-size-update signal.
1277 ///
1278 /// `Some(new_size)` when a peer SETTINGS frame adjusted
1279 /// `SETTINGS_HEADER_TABLE_SIZE` and we have not yet prepended the
1280 /// matching `001xxxxx` HPACK directive to a header block. Consumed and
1281 /// cleared by [`H2BlockConverter::emit_pending_size_update_if_new_block`]
1282 /// on the next `Block::StatusLine` or `Block::Header` encoded for the
1283 /// connection. Until then the peer's decoder still has its previous
1284 /// (possibly larger) table cap, so emitting is a correctness
1285 /// requirement, not a nicety — see the RFC 9113 encoder-decoder
1286 /// synchronisation contract (§6.5.2).
1287 pub pending_table_size_update: Option<u32>,
1288 /// Reusable buffer for HPACK-encoded headers in the H2 block converter.
1289 pub converter_buf: Vec<u8>,
1290 /// Reusable buffer for lowercasing header keys in the H2 block converter.
1291 pub lowercase_buf: Vec<u8>,
1292 /// Reusable buffer for assembling cookie values in the H2 block converter.
1293 pub cookie_buf: Vec<u8>,
1294 /// Connection draining state for graceful shutdown.
1295 pub drain: H2DrainState,
1296 pub zero: GenericHttpStream,
1297 /// Byte accounting for connection overhead attribution.
1298 pub bytes: H2ByteAccounting,
1299 /// Flood detector for CVE mitigations (Rapid Reset, CONTINUATION, Ping, Settings floods).
1300 pub flood_detector: H2FloodDetector,
1301 /// RFC 9113 §6.5: timestamp when we sent SETTINGS and are awaiting ACK.
1302 /// If the peer does not ACK within SETTINGS_ACK_TIMEOUT, we send GOAWAY
1303 /// with SettingsTimeout error.
1304 pub settings_sent_at: Option<Instant>,
1305 /// Queued RST_STREAM frames to send: Vec<(stream_id, error_code)>.
1306 /// Used when refusing streams (MAX_CONCURRENT_STREAMS, buffer exhaustion)
1307 /// during readable — the actual write happens in the writable preamble
1308 /// to avoid conflicting with kawa.storage usage for frame payload discard.
1309 pub pending_rst_streams: Vec<(StreamId, H2Error)>,
1310 /// RFC 9113 §6.8: tracks stream IDs for which RST_STREAM has already been sent,
1311 /// preventing duplicate RST_STREAM frames on the wire.
1312 pub rst_sent: HashSet<StreamId>,
1313 /// Lifetime counter of RST_STREAM frames queued (pending + already flushed).
1314 /// Used to detect sustained misbehavior even when writable() drains the
1315 /// pending queue between readable() calls.
1316 pub total_rst_streams_queued: usize,
1317 /// Reusable buffer for priority-sorted stream IDs in write_streams().
1318 /// Cleared and reused each call to avoid per-frame allocation.
1319 priorities_buf: Vec<StreamId>,
1320 /// True once we've asked rustls to emit TLS close_notify for this frontend.
1321 close_notify_sent: bool,
1322 /// Per-listener H2 connection tuning (window size, max streams, shrink ratio).
1323 pub connection_config: H2ConnectionConfig,
1324 /// Maximum pending WINDOW_UPDATE entries before dropping.
1325 /// Derived from `connection_config.max_concurrent_streams` at construction.
1326 max_pending_window_updates: usize,
1327 /// Last `(connection_window, active_streams, pending_window_updates)` snapshot
1328 /// emitted by [`Self::gauge_connection_state`]. The snapshot represents this
1329 /// connection's *contribution* to the three `h2.connection.*` aggregate
1330 /// gauges; each call emits the signed delta against this snapshot via
1331 /// [`gauge_add!`] so the gauge sums across connections.
1332 ///
1333 /// Stays `None` until the first emission. [`Drop`] applies the negative of
1334 /// this snapshot so the connection's contribution is always rebalanced to
1335 /// zero on teardown — independent of which close path runs.
1336 last_gauge_snapshot: Option<(usize, usize, usize)>,
1337 /// Per-stream wall-clock timestamp of last meaningful activity (DATA or
1338 /// HEADERS frame receipt). Used to cancel streams that make no forward
1339 /// progress within [`Self::stream_idle_timeout`] — mitigates slow-multiplex
1340 /// Slowloris: connection-level idle timers reset on every frame, so a
1341 /// misbehaving peer can otherwise pin up to `max_concurrent_streams` slots
1342 /// for the full nominal connection timeout.
1343 ///
1344 /// Initialized when the stream is created and refreshed on each non-empty
1345 /// inbound DATA frame and on HEADERS for an existing stream (trailers).
1346 /// Empty DATA frames (CVE-2019-9518 vector) do NOT refresh the timer.
1347 pub stream_last_activity_at: HashMap<StreamId, Instant>,
1348 /// Per-stream idle cap. Streams with no activity for longer than this are
1349 /// RST_STREAM(CANCEL)'d by [`Self::cancel_timed_out_streams`].
1350 pub stream_idle_timeout: std::time::Duration,
1351 /// RFC 9113 §5.1.2 back-pressure: count of stream refusals
1352 /// (REFUSED_STREAM emitted via [`Self::refuse_stream_and_discard`]) within
1353 /// the current back-pressure window. When the count exceeds
1354 /// [`BACKPRESSURE_REFUSAL_THRESHOLD`] inside one
1355 /// [`BACKPRESSURE_WINDOW_DURATION`] we halve the advertised
1356 /// `SETTINGS_MAX_CONCURRENT_STREAMS` to signal the peer to slow down.
1357 refuse_count_window: u32,
1358 /// Start timestamp for the current back-pressure window.
1359 refuse_window_start: Instant,
1360 /// Set once we have halved `local_settings.settings_max_concurrent_streams`
1361 /// in response to a refusal burst. Prevents the cap from collapsing to 0
1362 /// on sustained abuse — a single halving per connection is sufficient to
1363 /// signal back-pressure; further bursts trigger `EnhanceYourCalm`.
1364 mcs_backpressure_applied: bool,
1365}
1366impl<Front: SocketHandler> std::fmt::Debug for ConnectionH2<Front> {
1367 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1368 f.debug_struct("ConnectionH2")
1369 .field("position", &self.position)
1370 .field("state", &self.state)
1371 .field("expect", &self.expect_read)
1372 .field("readiness", &self.readiness)
1373 .field("local_settings", &self.local_settings)
1374 .field("peer_settings", &self.peer_settings)
1375 .field("socket", &self.socket.socket_ref())
1376 .field("streams", &self.streams)
1377 .field("zero", &self.zero.storage.meter(20))
1378 .field("window", &self.flow_control.window)
1379 .field("total_rst_streams_queued", &self.total_rst_streams_queued)
1380 .finish()
1381 }
1382}
1383
1384/// Symmetric tear-down for the three `h2.connection.*` aggregate gauges:
1385/// whatever positive contribution this connection made via
1386/// [`ConnectionH2::gauge_connection_state`] is subtracted back out when the
1387/// connection is dropped.
1388///
1389/// Using `Drop` (rather than wiring decrements into every close path —
1390/// `graceful_goaway`, `force_disconnect`, `handle_goaway_frame`, `Mux::close`,
1391/// stream-id exhaustion, panic-unwind) is what guarantees the gauge is
1392/// arithmetically symmetric regardless of which path teardown took. Past
1393/// underflow incidents (commits a650ad69, d2f01ed4) have all been
1394/// missing-decrement bugs that `Drop` makes structurally impossible.
1395impl<Front: SocketHandler> Drop for ConnectionH2<Front> {
1396 fn drop(&mut self) {
1397 self.release_connection_gauges();
1398 }
1399}
1400
1401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1402pub enum H2StreamId {
1403 Zero,
1404 Other { id: StreamId, gid: GlobalStreamId },
1405}
1406
1407impl<Front: SocketHandler> ConnectionH2<Front> {
1408 fn frontend_hung_up_while_draining(&self) -> bool {
1409 matches!(self.position, Position::Server)
1410 && self.drain.draining
1411 && (self.readiness.event.is_hup() || self.readiness.event.is_error())
1412 }
1413
1414 /// Once the final GOAWAY has been queued and all streams/control frames are
1415 /// gone, a peer-side HUP/ERR means any remaining rustls backlog is no
1416 /// longer deliverable. Waiting on `socket_wants_write()` in that state can
1417 /// deadlock shutdown forever because GOAWAY disables further frame reads.
1418 fn peer_gone_after_final_goaway(&self) -> bool {
1419 self.frontend_hung_up_while_draining()
1420 && matches!(self.state, H2State::GoAway | H2State::Error)
1421 && self.streams.is_empty()
1422 && self.expect_write.is_none()
1423 && self.zero.storage.is_empty()
1424 }
1425
1426 /// Shared constructor for both server and client H2 connections.
1427 ///
1428 /// Differences between server and client are captured by the caller-provided
1429 /// `position`, `expect_read`, and `readiness_interest` parameters.
1430 #[allow(clippy::too_many_arguments)]
1431 pub(super) fn new(
1432 session_ulid: Ulid,
1433 socket: Front,
1434 position: super::Position,
1435 pool: std::rc::Weak<std::cell::RefCell<crate::pool::Pool>>,
1436 flood_config: H2FloodConfig,
1437 connection_config: H2ConnectionConfig,
1438 stream_idle_timeout: std::time::Duration,
1439 graceful_shutdown_deadline: Option<std::time::Duration>,
1440 timeout_container: crate::timer::TimeoutContainer,
1441 expect_read: Option<(H2StreamId, usize)>,
1442 readiness_interest: sozu_command::ready::Ready,
1443 ) -> Option<Self> {
1444 let buffer = pool
1445 .upgrade()
1446 .and_then(|pool| pool.borrow_mut().checkout())?;
1447 let local_settings = H2Settings {
1448 settings_max_concurrent_streams: connection_config.max_concurrent_streams,
1449 ..H2Settings::default()
1450 };
1451 let mut decoder = loona_hpack::Decoder::new();
1452 // RFC 7541 §4.2: enforce SETTINGS_HEADER_TABLE_SIZE as the upper bound
1453 // for dynamic table size updates from the peer
1454 decoder.set_max_allowed_table_size(local_settings.settings_header_table_size as usize);
1455 Some(ConnectionH2 {
1456 session_ulid,
1457 decoder,
1458 encoder: loona_hpack::Encoder::new(),
1459 expect_read,
1460 expect_write: None,
1461 last_stream_id: 0,
1462 local_settings,
1463 peer_settings: H2Settings::default(),
1464 position,
1465 prioriser: Prioriser::default(),
1466 readiness: crate::Readiness {
1467 interest: readiness_interest,
1468 event: Ready::EMPTY,
1469 },
1470 socket,
1471 state: H2State::ClientPreface,
1472 streams: std::collections::HashMap::with_capacity(8),
1473 timeout_container,
1474 flow_control: H2FlowControl {
1475 window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
1476 received_bytes_since_update: 0,
1477 pending_window_updates: HashMap::new(),
1478 },
1479 highest_peer_stream_id: 0,
1480 pending_table_size_update: None,
1481 converter_buf: Vec::new(),
1482 lowercase_buf: Vec::new(),
1483 cookie_buf: Vec::new(),
1484 drain: H2DrainState {
1485 draining: false,
1486 peer_last_stream_id: None,
1487 started_at: None,
1488 graceful_shutdown_deadline,
1489 },
1490 zero: kawa::Kawa::new(kawa::Kind::Request, kawa::Buffer::new(buffer)),
1491 bytes: H2ByteAccounting {
1492 zero_bytes_read: 0,
1493 overhead_bin: 0,
1494 overhead_bout: 0,
1495 },
1496 flood_detector: H2FloodDetector::new(flood_config),
1497 settings_sent_at: None,
1498 pending_rst_streams: Vec::new(),
1499 rst_sent: std::collections::HashSet::new(),
1500 total_rst_streams_queued: 0,
1501 priorities_buf: Vec::new(),
1502 close_notify_sent: false,
1503 max_pending_window_updates: 1 + connection_config.max_concurrent_streams as usize * 4,
1504 connection_config,
1505 last_gauge_snapshot: None,
1506 stream_last_activity_at: HashMap::new(),
1507 stream_idle_timeout,
1508 refuse_count_window: 0,
1509 refuse_window_start: Instant::now(),
1510 mcs_backpressure_applied: false,
1511 })
1512 }
1513
1514 /// Start TLS close_notify on the frontend and keep the session alive until
1515 /// rustls has flushed the generated records.
1516 pub fn initiate_close_notify(&mut self) -> bool {
1517 if !self.position.is_server()
1518 || matches!(
1519 self.state,
1520 H2State::ClientPreface | H2State::ClientSettings | H2State::ServerSettings
1521 )
1522 {
1523 return false;
1524 }
1525 if !self.close_notify_sent {
1526 trace!("{} H2 initiating CLOSE_NOTIFY", log_context!(self));
1527 self.socket.socket_close();
1528 self.close_notify_sent = true;
1529 }
1530 if self.socket.socket_wants_write() {
1531 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
1532 self.ensure_tls_flushed();
1533 true
1534 } else {
1535 false
1536 }
1537 }
1538
1539 fn expect_header(&mut self) {
1540 self.state = H2State::Header;
1541 self.expect_read = Some((H2StreamId::Zero, 9));
1542 }
1543
1544 /// Process the `H2State::Header` state: parse a 9-byte frame header from
1545 /// `self.zero`, validate the stream, create new streams if needed, and
1546 /// transition to `H2State::Frame` for the payload.
1547 ///
1548 /// Returns `MuxResult` — the caller should propagate the result directly.
1549 fn handle_header_state<L>(&mut self, context: &mut Context<L>) -> MuxResult
1550 where
1551 L: ListenerHandler + L7ListenerHandler,
1552 {
1553 let i = self.zero.storage.data();
1554 trace!("{} header: {:?}", log_context!(self), i);
1555 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
1556 Ok((_, header)) => {
1557 trace!("{} {:#?}", log_context!(self), header);
1558 self.zero.storage.clear();
1559 let stream_id = header.stream_id;
1560 // RFC 9113 §6.10: CONTINUATION frames MUST be preceded by a
1561 // HEADERS or PUSH_PROMISE frame without END_HEADERS. When we
1562 // reach `handle_header_state`, we are between frames and no
1563 // header block is in progress (otherwise the state would be
1564 // `H2State::ContinuationHeader`). A CONTINUATION frame arriving
1565 // here is therefore standalone and MUST be treated as a
1566 // connection error of type PROTOCOL_ERROR.
1567 if header.frame_type == FrameType::Continuation {
1568 error!(
1569 "{} standalone CONTINUATION frame on stream {} without preceding HEADERS",
1570 log_context!(self),
1571 stream_id
1572 );
1573 return self.goaway(H2Error::ProtocolError);
1574 }
1575 // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
1576 // Route unknown frames (and any stream_id == 0 control frame)
1577 // through stream 0 (the connection-level buffer) so
1578 // `handle_frame` can drop them without touching stream state.
1579 let read_stream = if stream_id == 0
1580 || matches!(header.frame_type, FrameType::Unknown(_))
1581 {
1582 H2StreamId::Zero
1583 } else if let Some(global_stream_id) = self.streams.get(&stream_id) {
1584 let allowed_on_half_closed = header.frame_type == FrameType::WindowUpdate
1585 || header.frame_type == FrameType::Priority
1586 || header.frame_type == FrameType::RstStream;
1587 let stream = &context.streams[*global_stream_id];
1588 // Use the position-aware end_of_stream flag:
1589 // - Server reads from front (client requests)
1590 // - Client reads from back (backend responses)
1591 let received_eos = if self.position.is_server() {
1592 stream.front_received_end_of_stream
1593 } else {
1594 stream.back_received_end_of_stream
1595 };
1596 trace!(
1597 "{} REQUESTING EXISTING STREAM {}: {}/{:?}",
1598 log_context!(self),
1599 stream_id,
1600 received_eos,
1601 stream.state
1602 );
1603 if !allowed_on_half_closed && (received_eos || !stream.state.is_open()) {
1604 error!(
1605 "{} CANNOT RECEIVE {:?} ON THIS STREAM {:?}",
1606 log_context!(self),
1607 header.frame_type,
1608 stream.state
1609 );
1610 return self.goaway(H2Error::StreamClosed);
1611 }
1612 // RFC 9113 §8.1: a HEADERS frame received in the body
1613 // phase is a trailer block and MUST carry END_STREAM. This
1614 // closes the request-smuggling primitive where a peer sends
1615 // HEADERS, DATA, HEADERS (no END_STREAM) to chain header
1616 // blocks on the same stream ID.
1617 //
1618 // Discriminate from the read-side Kawa parsing phase rather
1619 // than stream existence: on Position::Client the stream is
1620 // created when we send the request to the backend, so the
1621 // initial backend response HEADERS legitimately arrives on
1622 // an existing stream. Similarly, 1xx→final transitions on
1623 // either side may yield multiple HEADERS frames before the
1624 // body begins (kawa clears back to initial / terminated on
1625 // 1xx; neither is main_phase). Only HEADERS arriving once
1626 // the read side has transitioned to Body/Chunks parsing —
1627 // i.e. after headers were fully consumed and body framing
1628 // is in progress — may be a trailer.
1629 let read_in_body = if self.position.is_server() {
1630 stream.front.is_main_phase()
1631 } else {
1632 stream.back.is_main_phase()
1633 };
1634 if header.frame_type == FrameType::Headers
1635 && read_in_body
1636 && header.flags & parser::FLAG_END_STREAM == 0
1637 {
1638 error!(
1639 "{} HEADERS without END_STREAM on open stream {} in body phase: trailers MUST carry END_STREAM",
1640 log_context!(self),
1641 stream_id
1642 );
1643 return self.goaway(H2Error::ProtocolError);
1644 }
1645 if header.frame_type == FrameType::Data {
1646 H2StreamId::Other {
1647 id: stream_id,
1648 gid: *global_stream_id,
1649 }
1650 } else {
1651 H2StreamId::Zero
1652 }
1653 } else {
1654 // RFC 9113 §5.1.1: stream identifiers MUST be strictly
1655 // increasing. Tightened from `>=` to `>` so that a peer
1656 // cannot re-use `self.last_stream_id` (which would
1657 // conflict with our own server-pushed streams if we
1658 // ever enable push in the future). For the first
1659 // request on a fresh connection `last_stream_id == 0`
1660 // and any client-initiated odd stream still passes.
1661 if header.frame_type == FrameType::Headers
1662 && self.position.is_server()
1663 && stream_id & 1 == 1
1664 && stream_id > self.last_stream_id
1665 {
1666 // RFC 9113 §6.8: after sending a GOAWAY, the proxy
1667 // MUST NOT accept new streams.
1668 // `graceful_goaway` sets `drain.draining = true`
1669 // and sends an initial GOAWAY with last_stream_id =
1670 // STREAM_ID_MAX (so in-flight requests are still
1671 // accepted), but the contract for *new* peer-
1672 // initiated streams is that they must be refused.
1673 // Without this check, a peer racing the drain
1674 // window could open arbitrary new streams between
1675 // the initial and final GOAWAY emission.
1676 if self.drain.draining {
1677 if stream_id > self.highest_peer_stream_id {
1678 self.highest_peer_stream_id = stream_id;
1679 }
1680 return self.refuse_stream_and_discard(
1681 stream_id,
1682 H2Error::RefusedStream,
1683 header.payload_len,
1684 );
1685 }
1686 if self.streams.len()
1687 >= self.local_settings.settings_max_concurrent_streams as usize
1688 {
1689 error!(
1690 "{} MAX CONCURRENT STREAMS: limit={}, current={}",
1691 log_context!(self),
1692 self.local_settings.settings_max_concurrent_streams,
1693 self.streams.len()
1694 );
1695 // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
1696 // queueing RST_STREAM so GOAWAY reports the correct
1697 // last_stream_id if the connection closes later.
1698 if stream_id > self.highest_peer_stream_id {
1699 self.highest_peer_stream_id = stream_id;
1700 }
1701 return self.refuse_stream_and_discard(
1702 stream_id,
1703 H2Error::RefusedStream,
1704 header.payload_len,
1705 );
1706 }
1707 match self.create_stream(stream_id, context) {
1708 Some(_) => {}
1709 None => {
1710 // Buffer pool exhaustion is transient — refuse
1711 // this stream but keep the connection alive so
1712 // existing streams can complete and free buffers.
1713 error!(
1714 "{} Could not create stream {}: buffer pool exhausted",
1715 log_context!(self),
1716 stream_id
1717 );
1718 // RFC 9113 §6.8: update highest_peer_stream_id BEFORE
1719 // queueing RST_STREAM so GOAWAY reports the correct
1720 // last_stream_id if the connection closes later.
1721 if stream_id > self.highest_peer_stream_id {
1722 self.highest_peer_stream_id = stream_id;
1723 }
1724 return self.refuse_stream_and_discard(
1725 stream_id,
1726 H2Error::RefusedStream,
1727 header.payload_len,
1728 );
1729 }
1730 }
1731 } else if header.frame_type != FrameType::Priority {
1732 // Distinguish closed vs idle: check whether the stream
1733 // was previously opened. For Server position, compare
1734 // against highest_peer_stream_id (client-initiated).
1735 // For Client position, compare against last_stream_id
1736 // (our own initiated streams) since the peer never
1737 // initiates streams on a backend connection.
1738 let is_closed_stream = if self.position.is_server() {
1739 header.stream_id <= self.highest_peer_stream_id
1740 } else {
1741 header.stream_id < self.last_stream_id
1742 };
1743 if is_closed_stream {
1744 match header.frame_type {
1745 FrameType::RstStream | FrameType::WindowUpdate => {
1746 // RFC 9113 §5.1: RST_STREAM and WINDOW_UPDATE
1747 // on a closed stream can arrive due to race
1748 // conditions and should be consumed/discarded.
1749 debug!(
1750 "{} Ignoring {:?} on closed stream {}",
1751 log_context!(self),
1752 header.frame_type,
1753 header.stream_id
1754 );
1755 self.flood_detector.glitch_count += 1;
1756 check_flood_or_return!(self);
1757 }
1758 FrameType::Data => {
1759 // RFC 9113 §5.1: DATA on a closed stream is a
1760 // stream error of type STREAM_CLOSED. Queue
1761 // RST_STREAM (not GOAWAY) to preserve the
1762 // connection for other streams. The payload is
1763 // still routed through stream 0 so handle_frame
1764 // can do connection-level flow control accounting.
1765 debug!(
1766 "{} DATA on closed stream {}, sending RST_STREAM(STREAM_CLOSED)",
1767 log_context!(self),
1768 header.stream_id
1769 );
1770 self.flood_detector.glitch_count += 1;
1771 check_flood_or_return!(self);
1772 if let Some(result) =
1773 self.enqueue_rst(header.stream_id, H2Error::StreamClosed)
1774 {
1775 return result;
1776 }
1777 }
1778 _ => {
1779 // RFC 9113 §5.1: HEADERS or other frames on a
1780 // closed stream → connection error STREAM_CLOSED.
1781 error!(
1782 "{} Received {:?} on closed stream {}, sending GOAWAY(STREAM_CLOSED)",
1783 log_context!(self),
1784 header.frame_type,
1785 header.stream_id
1786 );
1787 return self.goaway(H2Error::StreamClosed);
1788 }
1789 }
1790 } else {
1791 error!(
1792 "{} Received {:?} on idle stream {}, sending GOAWAY(PROTOCOL_ERROR)",
1793 log_context!(self),
1794 header.frame_type,
1795 header.stream_id
1796 );
1797 return self.goaway(H2Error::ProtocolError);
1798 }
1799 }
1800 H2StreamId::Zero
1801 };
1802 trace!(
1803 "{} {} {:?} {:#?}",
1804 log_context!(self),
1805 header.stream_id,
1806 stream_id,
1807 self.streams
1808 );
1809 self.expect_read = Some((read_stream, header.payload_len as usize));
1810 self.state = H2State::Frame(header);
1811 }
1812 Err(error) => {
1813 let error = error_nom_to_h2(error);
1814 error!("{} COULD NOT PARSE FRAME HEADER", log_context!(self));
1815 return self.goaway(error);
1816 }
1817 };
1818 MuxResult::Continue
1819 }
1820
1821 /// Process the `H2State::ContinuationHeader` state: parse a CONTINUATION
1822 /// frame header from `self.zero`, validate stream ID continuity, track
1823 /// flood detection counters, and transition to `ContinuationFrame`.
1824 ///
1825 /// The `headers` parameter is the accumulated HEADERS context from the
1826 /// initial HEADERS frame (cloned from the state enum to avoid borrow
1827 /// conflicts).
1828 fn handle_continuation_header_state(&mut self, headers: &Headers) -> MuxResult {
1829 let i = self.zero.storage.unparsed_data();
1830 trace!("{} continuation header: {:?}", log_context!(self), i);
1831 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
1832 Ok((
1833 _,
1834 FrameHeader {
1835 payload_len,
1836 frame_type: FrameType::Continuation,
1837 flags,
1838 stream_id,
1839 },
1840 )) => {
1841 if self.zero.storage.end < 9 {
1842 error!(
1843 "{} CONTINUATION header: storage.end ({}) too small to remove frame header",
1844 log_context!(self),
1845 self.zero.storage.end
1846 );
1847 return self.goaway(H2Error::InternalError);
1848 }
1849 self.zero.storage.end -= 9;
1850 if stream_id != headers.stream_id {
1851 error!(
1852 "{} CONTINUATION stream_id {} does not match HEADERS stream_id {}",
1853 log_context!(self),
1854 stream_id,
1855 headers.stream_id
1856 );
1857 return self.goaway(H2Error::ProtocolError);
1858 }
1859 // CVE-2024-27316: track CONTINUATION frame count and accumulated size
1860 self.flood_detector.continuation_count += 1;
1861 self.flood_detector.accumulated_header_size = self
1862 .flood_detector
1863 .accumulated_header_size
1864 .saturating_add(payload_len);
1865 check_flood_or_return!(self);
1866 // RFC 9113 §10.5.1: reject header blocks that cannot be
1867 // buffered. Previously we silently removed READABLE interest
1868 // when amount > available_space, stalling the connection.
1869 // If the payload still fits in our zero buffer we can refuse
1870 // just this stream (RST_STREAM + drain); if not, the
1871 // connection can no longer decode header blocks safely and we
1872 // escalate to GOAWAY(EnhanceYourCalm).
1873 if self.flood_detector.accumulated_header_size
1874 > self.flood_detector.config.max_header_list_size
1875 {
1876 error!(
1877 "{} CONTINUATION accumulated header size {} exceeds {}",
1878 log_context!(self),
1879 self.flood_detector.accumulated_header_size,
1880 self.flood_detector.config.max_header_list_size
1881 );
1882 if (payload_len as usize) > self.zero.storage.available_space() {
1883 return self.goaway(H2Error::EnhanceYourCalm);
1884 }
1885 // Remove the already-created stream slot before refusing,
1886 // so it does not leak against MAX_CONCURRENT_STREAMS. Route
1887 // through `remove_dead_stream` so the expect_write/read
1888 // invariant (§LIFECYCLE.md 5.4) holds on this path too.
1889 if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
1890 self.remove_dead_stream(stream_id, global_stream_id);
1891 }
1892 return self.refuse_stream_and_discard(
1893 stream_id,
1894 H2Error::RefusedStream,
1895 payload_len,
1896 );
1897 }
1898 if (payload_len as usize) > self.zero.storage.available_space() {
1899 error!(
1900 "{} CONTINUATION payload {} exceeds buffer space {}",
1901 log_context!(self),
1902 payload_len,
1903 self.zero.storage.available_space()
1904 );
1905 return self.goaway(H2Error::EnhanceYourCalm);
1906 }
1907 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
1908 let mut headers = headers.clone();
1909 headers.end_headers = flags & parser::FLAG_END_HEADERS != 0;
1910 headers.header_block_fragment.len = headers
1911 .header_block_fragment
1912 .len
1913 .saturating_add(payload_len);
1914 self.state = H2State::ContinuationFrame(headers);
1915 }
1916 Err(error) => {
1917 let error = error_nom_to_h2(error);
1918 error!("{} COULD NOT PARSE CONTINUATION HEADER", log_context!(self));
1919 return self.goaway(error);
1920 }
1921 other => {
1922 error!(
1923 "{} UNEXPECTED {:?} WHILE PARSING CONTINUATION HEADER",
1924 log_context!(self),
1925 other
1926 );
1927 return self.goaway(H2Error::ProtocolError);
1928 }
1929 };
1930 MuxResult::Continue
1931 }
1932
1933 pub fn readable<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
1934 where
1935 E: Endpoint,
1936 L: ListenerHandler + L7ListenerHandler,
1937 {
1938 self.prune_inactive_streams_while_closing(context);
1939 // Pass 4 Medium #3: per-stream idle guard. Slow-multiplex Slowloris
1940 // sends one byte or a control frame per stream just often enough to
1941 // reset the connection-level timer; per-stream deadlines catch it.
1942 self.cancel_timed_out_streams(context, &mut endpoint);
1943
1944 // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
1945 if let Some(sent_at) = self.settings_sent_at {
1946 if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
1947 warn!(
1948 "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
1949 log_context!(self),
1950 SETTINGS_ACK_TIMEOUT
1951 );
1952 return self.goaway(H2Error::SettingsTimeout);
1953 }
1954 }
1955
1956 // Don't reset the timeout unconditionally here. Only application data
1957 // (DATA/HEADERS frames) should reset the timeout. H2 control frames
1958 // (PING, WINDOW_UPDATE, SETTINGS) must NOT reset it, otherwise a peer
1959 // sending periodic PINGs prevents timeout detection on stuck sessions.
1960 // The timeout is reset:
1961 // - Below, when reading DATA payload (H2StreamId::Other)
1962 // - In handle_frame(), when processing HEADERS frames
1963 let (stream_id, kawa) = if let Some((stream_id, amount)) = self.expect_read {
1964 let (kawa, did) = match stream_id {
1965 H2StreamId::Zero => (&mut self.zero, usize::MAX),
1966 H2StreamId::Other {
1967 gid: global_stream_id,
1968 ..
1969 } => {
1970 // Reading DATA frame payload for an application stream.
1971 // This is real application activity — reset the timeout.
1972 self.timeout_container.reset();
1973 (
1974 context.streams[global_stream_id]
1975 .split(&self.position)
1976 .rbuffer,
1977 global_stream_id,
1978 )
1979 }
1980 };
1981 trace!(
1982 "{} {:?}({:?}, {})",
1983 log_context!(self),
1984 self.state,
1985 stream_id,
1986 amount
1987 );
1988 if amount > 0 {
1989 if amount > kawa.storage.available_space() {
1990 self.readiness.interest.remove(Ready::READABLE);
1991 return MuxResult::Continue;
1992 }
1993 let (size, status) = self.socket.socket_read(&mut kawa.storage.space()[..amount]);
1994 context.debug.push(DebugEvent::SocketIO(0, did, size));
1995 kawa.storage.fill(size);
1996 self.position.count_bytes_in_counter(size);
1997 self.bytes.zero_bytes_read += size;
1998 if update_readiness_after_read(size, status, &mut self.readiness) {
1999 if matches!(self.position, Position::Server)
2000 && self.drain.draining
2001 && matches!(status, SocketResult::Closed | SocketResult::Error)
2002 {
2003 // During graceful drain, a frontend EOF/HUP means no
2004 // further frame headers or payload bytes can arrive.
2005 // Keeping expect_read here strands the connection in
2006 // Header/Frame forever even after the peer is gone.
2007 self.expect_read = None;
2008 }
2009 return MuxResult::Continue;
2010 } else if size == amount {
2011 self.expect_read = None;
2012 } else {
2013 self.expect_read = Some((stream_id, amount - size));
2014 if let (H2State::ClientPreface, Position::Server) =
2015 (&self.state, &self.position)
2016 {
2017 let i = kawa.storage.data();
2018 if !b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n".starts_with(i) {
2019 debug!("{} EARLY INVALID PREFACE: {:?}", log_context!(self), i);
2020 return self.force_disconnect();
2021 }
2022 }
2023 return MuxResult::Continue;
2024 }
2025 } else {
2026 self.expect_read = None;
2027 }
2028 (stream_id, kawa)
2029 } else {
2030 self.readiness.event.remove(Ready::READABLE);
2031 return MuxResult::Continue;
2032 };
2033 match (&self.state, &self.position) {
2034 (H2State::Error, _)
2035 | (H2State::GoAway, _)
2036 | (H2State::ServerSettings, Position::Server)
2037 | (H2State::ClientPreface, Position::Client(..))
2038 | (H2State::ClientSettings, Position::Client(..)) => {
2039 error!(
2040 "{} Unexpected combination: (Readable, {:?}, {:?})",
2041 log_context!(self),
2042 self.state,
2043 self.position
2044 );
2045 return self.force_disconnect();
2046 }
2047 (H2State::Discard, _) => {
2048 let _i = kawa.storage.data();
2049 trace!("{} DISCARDING: {:?}", log_context!(self), _i);
2050 kawa.storage.clear();
2051 self.attribute_bytes_to_overhead();
2052 self.expect_header();
2053 }
2054 (H2State::ClientPreface, Position::Server) => {
2055 let i = kawa.storage.data();
2056 let i = match parser::preface(i) {
2057 Ok((i, _)) => i,
2058 Err(_) => return self.force_disconnect(),
2059 };
2060 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2061 Ok((
2062 _,
2063 FrameHeader {
2064 payload_len,
2065 frame_type: FrameType::Settings,
2066 flags: 0,
2067 stream_id: 0,
2068 },
2069 )) => {
2070 kawa.storage.clear();
2071 self.state = H2State::ClientSettings;
2072 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2073 }
2074 _ => return self.force_disconnect(),
2075 };
2076 }
2077 (H2State::ClientSettings, Position::Server) => {
2078 let i = kawa.storage.data();
2079 let settings = match parser::settings_frame(
2080 i,
2081 &FrameHeader {
2082 payload_len: i.len() as u32,
2083 frame_type: FrameType::Settings,
2084 flags: 0,
2085 stream_id: 0,
2086 },
2087 ) {
2088 Ok((_, settings)) => {
2089 kawa.storage.clear();
2090 settings
2091 }
2092 Err(_) => return self.force_disconnect(),
2093 };
2094 let kawa = &mut self.zero;
2095 match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
2096 Ok((_, size)) => {
2097 kawa.storage.fill(size);
2098 incr!(names::h2::FRAMES_TX_SETTINGS);
2099 // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
2100 self.settings_sent_at = Some(Instant::now());
2101 }
2102 Err(error) => {
2103 error!(
2104 "{} Could not serialize SettingsFrame: {:?}",
2105 log_context!(self),
2106 error
2107 );
2108 return self.force_disconnect();
2109 }
2110 };
2111
2112 self.state = H2State::ServerSettings;
2113 self.expect_write = Some(H2StreamId::Zero);
2114 self.readiness.signal_pending_write();
2115 return self.handle_frame(settings, 0, context, endpoint);
2116 }
2117 (H2State::ServerSettings, Position::Client(..)) => {
2118 let i = kawa.storage.data();
2119 match parser::frame_header(i, self.local_settings.settings_max_frame_size) {
2120 Ok((
2121 _,
2122 header @ FrameHeader {
2123 payload_len,
2124 frame_type: FrameType::Settings,
2125 flags: 0,
2126 stream_id: 0,
2127 },
2128 )) => {
2129 kawa.storage.clear();
2130 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
2131 self.state = H2State::Frame(header)
2132 }
2133 _ => return self.force_disconnect(),
2134 };
2135 }
2136 (H2State::Header, _) => {
2137 return self.handle_header_state(context);
2138 }
2139 (H2State::ContinuationHeader(headers), _) => {
2140 let headers = headers.clone();
2141 return self.handle_continuation_header_state(&headers);
2142 }
2143 (H2State::Frame(header), _) => {
2144 let i = kawa.storage.unparsed_data();
2145 trace!("{} data: {:?}", log_context!(self), i);
2146 let wire_payload_len = header.payload_len;
2147 let frame = match parser::frame_body(i, header) {
2148 Ok((_, frame)) => frame,
2149 Err(error) => {
2150 let error = error_nom_to_h2(error);
2151 error!("{} COULD NOT PARSE FRAME BODY", log_context!(self));
2152 return self.goaway(error);
2153 }
2154 };
2155 if let H2StreamId::Zero = stream_id {
2156 if header.frame_type == FrameType::Headers {
2157 kawa.storage.head = kawa.storage.end;
2158 } else {
2159 kawa.storage.end = kawa.storage.head;
2160 }
2161 }
2162 self.expect_header();
2163 return self.handle_frame(frame, wire_payload_len, context, endpoint);
2164 }
2165 (H2State::ContinuationFrame(headers), _) => {
2166 kawa.storage.head = kawa.storage.end;
2167 let i = kawa.storage.data();
2168 trace!("{} data: {:?}", log_context!(self), i);
2169 let headers = headers.clone();
2170 self.expect_header();
2171 return self.handle_frame(Frame::Headers(headers), 0, context, endpoint);
2172 }
2173 }
2174 MuxResult::Continue
2175 }
2176
2177 /// Update the H2 connection-level *aggregate* gauges with this connection's
2178 /// current contribution, expressed as a signed delta against the last
2179 /// snapshot we emitted.
2180 ///
2181 /// The three metrics are emitted via [`gauge_add!`] (lifecycle deltas) so
2182 /// that the dashboard sees the **sum across all live H2 connections**:
2183 ///
2184 /// - `h2.connection.window_bytes` — sum of available connection-level
2185 /// send-window bytes. Negative per-connection windows clamp to 0 so the
2186 /// aggregate represents only available capacity, not deficit.
2187 /// - `h2.connection.active_streams` — sum of in-flight streams across
2188 /// every H2 connection.
2189 /// - `h2.connection.pending_window_updates` — sum of queued (un-flushed)
2190 /// per-stream WINDOW_UPDATE entries across every H2 connection.
2191 ///
2192 /// Called from the write hot path; emits nothing when the snapshot is
2193 /// unchanged so the steady state stays cheap. The paired decrement for
2194 /// every increment is provided by [`Drop`], which subtracts the final
2195 /// snapshot when the connection is dropped — keeping the aggregate
2196 /// arithmetically symmetric independent of which close path runs
2197 /// (`graceful_goaway`, `force_disconnect`, `handle_goaway_frame`,
2198 /// `Mux::close`, panic-unwind, …).
2199 fn gauge_connection_state(&mut self) {
2200 let snapshot = (
2201 self.flow_control.window.max(0) as usize,
2202 self.streams.len(),
2203 self.flow_control.pending_window_updates.len(),
2204 );
2205 if self.last_gauge_snapshot == Some(snapshot) {
2206 return;
2207 }
2208 let prev = self.last_gauge_snapshot.unwrap_or((0, 0, 0));
2209 // Diff in i64 — usize cannot represent the negative side of the delta.
2210 let dw = snapshot.0 as i64 - prev.0 as i64;
2211 let ds = snapshot.1 as i64 - prev.1 as i64;
2212 let du = snapshot.2 as i64 - prev.2 as i64;
2213 if dw != 0 {
2214 gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, dw);
2215 }
2216 if ds != 0 {
2217 gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, ds);
2218 }
2219 if du != 0 {
2220 gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, du);
2221 }
2222 self.last_gauge_snapshot = Some(snapshot);
2223 }
2224
2225 /// Subtract this connection's contribution from the three aggregate
2226 /// `h2.connection.*` gauges. Idempotent: clears `last_gauge_snapshot` so a
2227 /// second call (or a [`Drop`] on top of an explicit reset) is a no-op.
2228 ///
2229 /// Pairs with every prior call to [`Self::gauge_connection_state`]; called
2230 /// from [`Drop`] so the symmetry is guaranteed regardless of the close
2231 /// path.
2232 fn release_connection_gauges(&mut self) {
2233 if let Some((w, s, u)) = self.last_gauge_snapshot.take() {
2234 if w != 0 {
2235 gauge_add!(names::h2::CONNECTION_WINDOW_BYTES, -(w as i64));
2236 }
2237 if s != 0 {
2238 gauge_add!(names::h2::CONNECTION_ACTIVE_STREAMS, -(s as i64));
2239 }
2240 if u != 0 {
2241 gauge_add!(names::h2::CONNECTION_PENDING_WINDOW_UPDATES, -(u as i64));
2242 }
2243 }
2244 }
2245
2246 /// Write application data (request/response bodies, headers) across all
2247 /// active streams, respecting priority ordering and flow control.
2248 ///
2249 /// This is the main data-plane write path: it resumes any partially-written
2250 /// stream, prepares new frames via the H2 block converter, flushes them to
2251 /// the socket, and recycles completed streams.
2252 ///
2253 /// NOTE: The priority iteration loop and converter setup remain inline here
2254 /// because the converter borrows `self.encoder`, preventing further
2255 /// decomposition into `&mut self` methods within the loop body.
2256 fn write_streams<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E) -> MuxResult
2257 where
2258 E: Endpoint,
2259 L: ListenerHandler + L7ListenerHandler,
2260 {
2261 self.timeout_container.reset();
2262 // Pre-compute byte totals for proportional overhead distribution.
2263 let byte_totals = self.compute_stream_byte_totals(context);
2264 let mut io_slices: Vec<IoSlice<'static>> = Vec::new();
2265
2266 if let Some(
2267 write_stream @ H2StreamId::Other {
2268 id: stream_id,
2269 gid: global_stream_id,
2270 },
2271 ) = self.expect_write
2272 {
2273 let stream = &mut context.streams[global_stream_id];
2274 let stream_state = stream.state;
2275 let parts = stream.split(&self.position);
2276 let kawa = parts.wbuffer;
2277 // Resume path: if the same stream is parked waiting for buffer
2278 // space (expect_read matches write_stream), pass the amount so
2279 // flush_stream_out can re-enable READABLE as soon as we drain.
2280 let cross_read_amount = match self.expect_read {
2281 Some((read_stream, amount)) if write_stream == read_stream => Some(amount),
2282 _ => None,
2283 };
2284 let mut resume_bytes: usize = 0;
2285 let outcome = Self::flush_stream_out(
2286 &mut self.socket,
2287 kawa,
2288 parts.metrics,
2289 &self.position,
2290 &mut self.readiness,
2291 &mut context.debug,
2292 2,
2293 global_stream_id,
2294 None,
2295 cross_read_amount,
2296 &mut io_slices,
2297 Some(&mut resume_bytes),
2298 );
2299 // Refresh the per-stream idle timer when outbound bytes move: a
2300 // large response delivered at low bandwidth is "active", not idle,
2301 // even when the peer sends no inbound frames.
2302 if resume_bytes > 0 {
2303 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2304 *t = Instant::now();
2305 }
2306 }
2307 if outcome == FlushOutcome::Stalled {
2308 return MuxResult::Continue;
2309 }
2310 self.expect_write = None;
2311 if (kawa.is_terminated() || kawa.is_error())
2312 && kawa.is_completed()
2313 && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2314 {
2315 let (client_rtt, server_rtt) = Self::snapshot_rtts(
2316 &self.position,
2317 &self.socket,
2318 &endpoint,
2319 stream.linked_token(),
2320 );
2321
2322 if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2323 &self.position,
2324 &mut self.bytes,
2325 &self.streams,
2326 stream,
2327 global_stream_id,
2328 stream_id,
2329 byte_totals,
2330 &mut context.debug,
2331 context.listener.clone(),
2332 client_rtt,
2333 server_rtt,
2334 ) {
2335 // Remove the recycled stream from the connection maps
2336 // before endpoint.end_stream() can trigger teardown.
2337 // Otherwise session close can observe a stale `Recycle`
2338 // entry in self.streams and mis-handle the connection as
2339 // if it still had an active H2 stream.
2340 self.remove_dead_stream(dead_id, global_stream_id);
2341 if let Some(token) = token {
2342 remove_backend_stream(
2343 &mut context.backend_streams,
2344 token,
2345 global_stream_id,
2346 );
2347 endpoint.end_stream(token, global_stream_id, context);
2348 }
2349 }
2350 }
2351 }
2352
2353 self.gauge_connection_state();
2354
2355 let scheme: &'static [u8] = if context.listener.borrow().protocol() == Protocol::HTTPS {
2356 b"https"
2357 } else {
2358 b"http"
2359 };
2360 let mut completed_streams = Vec::new();
2361 let mut converter_buf = std::mem::take(&mut self.converter_buf);
2362 converter_buf.clear();
2363 let mut converter = converter::H2BlockConverter {
2364 max_frame_size: self.peer_settings.settings_max_frame_size as usize,
2365 window: 0,
2366 stream_id: 0,
2367 encoder: &mut self.encoder,
2368 out: converter_buf,
2369 scheme,
2370 lowercase_buf: std::mem::take(&mut self.lowercase_buf),
2371 cookie_buf: std::mem::take(&mut self.cookie_buf),
2372 // When this connection is a backend client we are writing
2373 // toward the upstream backend — flow-control stalls in that
2374 // direction are scoped to `backend.flow_control.paused` (in
2375 // addition to the existing direction-agnostic
2376 // `h2.flow_control_stall`).
2377 position_is_client: self.position.is_client(),
2378 // RFC 9218 §4: toggled per-stream in the loop below, driven by
2379 // `Prioriser::get(stream_id).1`. Non-incremental by default so
2380 // unit tests and non-scheduled callers (e.g. the resume path
2381 // above) keep the sequential semantics.
2382 incremental_mode: false,
2383 // Populated once per write pass from `apply_incremental_rotation`
2384 // below. The converter uses `incremental_peer_count <= 1` to skip
2385 // the RFC 9218 yield-after-one-DATA behaviour when there is no
2386 // peer to interleave with (solo-bucket fast path).
2387 incremental_peer_count: 0,
2388 // RFC 7541 §6.3: move the pending size-update onto the converter
2389 // so the first header block of this pass prepends the signal.
2390 // We clear the connection-side mirror only AFTER the write pass
2391 // confirms emission via `converter.size_update_emitted`, so a
2392 // DATA-only write pass (no header block) does not drop the
2393 // signal.
2394 pending_table_size_update: self.pending_table_size_update,
2395 size_update_emitted: false,
2396 // Reset on every write pass; `check_header_capacity` flips it
2397 // mid-call and `finalize` commits the abort by flipping
2398 // `kawa.parsing_phase` to Error so the next pass emits
2399 // RST_STREAM(InternalError).
2400 pending_oversized_abort: false,
2401 };
2402 self.priorities_buf.clear();
2403 self.priorities_buf.extend(self.streams.keys().copied());
2404 // RFC 9218 §4 primary sort: ascending urgency, then stream ID for
2405 // stability. The incremental flag is handled by
2406 // `apply_incremental_rotation` below so it does not perturb the
2407 // non-incremental fast path.
2408 self.priorities_buf.sort_by_cached_key(|id| {
2409 let (urgency, _) = self.prioriser.get(id);
2410 (urgency, *id)
2411 });
2412 // RFC 9218 §4: inside each urgency bucket, move incremental streams
2413 // to the tail and rotate them by the per-connection round-robin
2414 // cursor so no single slow-draining stream can starve its
2415 // same-urgency incremental peers.
2416 let incremental_count = self
2417 .prioriser
2418 .apply_incremental_rotation(&mut self.priorities_buf);
2419
2420 // RFC 9218 §4 refinement (Tier 3a): the connection-global
2421 // `incremental_count` is too coarse for `converter.incremental_peer_count`.
2422 // A solo `u=0, i` stream with an unrelated `u=7, i` peer in a
2423 // different urgency bucket would still see `incremental_peer_count > 1`
2424 // and voluntarily yield — stranding bytes the invariant-15/16 guards
2425 // were meant to prevent. Scope the count to same-urgency streams that
2426 // are actually ready to emit this pass (eligibility mirrors the check
2427 // in the write loop below).
2428 let mut ready_incremental_by_urgency: HashMap<u8, usize> = HashMap::new();
2429 for &sid in self.priorities_buf.iter() {
2430 let (urgency, is_incremental) = self.prioriser.get(&sid);
2431 if !is_incremental {
2432 continue;
2433 }
2434 let Some(&gid) = self.streams.get(&sid) else {
2435 continue;
2436 };
2437 let wbuffer = match self.position {
2438 Position::Server => &context.streams[gid].back,
2439 Position::Client(..) => &context.streams[gid].front,
2440 };
2441 if wbuffer.is_main_phase()
2442 || (wbuffer.is_terminated() && !wbuffer.is_completed())
2443 || (wbuffer.is_error() && !self.rst_sent.contains(&sid))
2444 {
2445 *ready_incremental_by_urgency.entry(urgency).or_insert(0) += 1;
2446 }
2447 }
2448
2449 trace!(
2450 "{} PRIORITIES: {:?} (incremental_count={}, per_bucket={:?})",
2451 log_context!(self),
2452 self.priorities_buf,
2453 incremental_count,
2454 ready_incremental_by_urgency
2455 );
2456 let mut socket_write = false;
2457 // RFC 9218 §4 round-robin: remember the first incremental stream we
2458 // served this pass so we can advance `Prioriser::incremental_cursor`
2459 // to it, causing the next pass to start with the stream just after.
2460 let mut first_incremental_fired: Option<StreamId> = None;
2461 // Total outbound bytes emitted across all stream flushes this pass —
2462 // `finalize_write` uses this to distinguish a voluntary scheduler
2463 // yield (progress + pending back-buffer, LIFECYCLE §9 invariant 16)
2464 // from a no-progress wait state (e.g. flow-control starvation).
2465 let mut total_bytes_written: usize = 0;
2466 // Collect every fresh RST_STREAM emitted via the converter
2467 // (`initialize` chokepoint or the HPACK over-budget abort path)
2468 // so we can run `account_emitted_rst` for each one AFTER the
2469 // converter is dropped — the converter holds `&mut self.encoder`
2470 // for the loop body so we cannot take `&mut self` until then.
2471 let mut freshly_emitted_rsts: Vec<H2Error> = Vec::new();
2472 'outer: for idx in 0..self.priorities_buf.len() {
2473 let stream_id = self.priorities_buf[idx];
2474 let Some(&global_stream_id) = self.streams.get(&stream_id) else {
2475 error!(
2476 "{} stream_id {} from sorted keys missing in streams map",
2477 log_context!(self),
2478 stream_id
2479 );
2480 continue;
2481 };
2482 let (urgency, is_incremental) = self.prioriser.get(&stream_id);
2483 let stream = &mut context.streams[global_stream_id];
2484 let stream_state = stream.state;
2485 let parts = stream.split(&self.position);
2486 let kawa = parts.wbuffer;
2487 if kawa.is_main_phase()
2488 || (kawa.is_terminated() && !kawa.is_completed())
2489 || (kawa.is_error() && !self.rst_sent.contains(&stream_id))
2490 {
2491 let window = min(*parts.window, self.flow_control.window);
2492 converter.window = window;
2493 converter.stream_id = stream_id;
2494 // RFC 9218 §4: incremental streams yield the converter after
2495 // a single DATA frame so same-urgency peers interleave.
2496 converter.incremental_mode = is_incremental;
2497 // Same-urgency-bucket ready-peer count (Tier 3a, LIFECYCLE §9
2498 // invariant 17). The converter skips the yield when there is
2499 // no peer in the same bucket to interleave with — prevents
2500 // the `finalize_write` WRITABLE-withdrawal strand (see
2501 // `test_h2_solo_incremental_drains_fully`). A connection-wide
2502 // count would wrongly yield for a solo incremental stream
2503 // when another urgency bucket happens to contain an
2504 // incremental peer.
2505 converter.incremental_peer_count = ready_incremental_by_urgency
2506 .get(&urgency)
2507 .copied()
2508 .unwrap_or(0);
2509 // Track RST_STREAM dedup: if kawa is in error state, the converter
2510 // will generate a RST_STREAM frame via `initialize`. Mark it so we
2511 // don't send a duplicate on the next writable cycle.
2512 if kawa.is_error() {
2513 let freshly_rst = self.rst_sent.insert(stream_id);
2514 // LIFECYCLE §9 invariant 17: any transition to ineligible
2515 // mid-pass MUST decrement ready_incremental_by_urgency so
2516 // later streams in the same 'outer iteration see the live
2517 // count, not the snapshot. Missing this costs one voluntary
2518 // yield per same-urgency peer that trails the RST.
2519 if freshly_rst && is_incremental {
2520 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2521 *c = c.saturating_sub(1);
2522 }
2523 }
2524 // Account for the RST that `initialize` is about to emit
2525 // for this stream. Without this the MadeYouReset lifetime
2526 // cap is evadable: any path that flips `parsing_phase` to
2527 // Error before reaching this gate (oversized inbound
2528 // trailers, malformed bodies, etc.) would land an
2529 // unaccounted RST on the wire. We defer the actual
2530 // accounting call until after `drop(converter)` — the
2531 // converter holds `&mut self.encoder` here.
2532 if freshly_rst {
2533 freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
2534 }
2535 }
2536 // Apply per-frontend response-side header edits
2537 // (set/replace/delete) stashed by the routing layer at
2538 // request time. H2 frontends always run as Server
2539 // position; the back-side H2 client (when sozu speaks
2540 // H2 to a backend) is a request emission and was
2541 // already mutated by Router::route_from_request.
2542 //
2543 // The snapshot is **drained** via `mem::take` so the
2544 // injection runs exactly once per response. Without
2545 // this, a re-entry of `write_streams` for the same
2546 // stream (multi-frame body, flow-control yield, or
2547 // RFC 9218 same-urgency round-robin) would re-call
2548 // `apply_response_header_edits` after `kawa.prepare`
2549 // had already consumed the `Block::Flags{end_header}`
2550 // anchor — the helper falls back to
2551 // `kawa.blocks.len()` and appends the edit AFTER all
2552 // remaining DATA blocks. The next prepare cycle then
2553 // encodes that orphan `Block::Header` into
2554 // `H2BlockConverter.out` with no closing
2555 // `Block::Flags{end_header}` to flush it as a HEADERS
2556 // frame, and `H2BlockConverter::finalize` trips the
2557 // "out buffer not empty (38 bytes remaining), clearing"
2558 // defense-in-depth log on every re-entry. 38 bytes is
2559 // the static-table HPACK encoding of a typical HSTS
2560 // header, which is how the symptom surfaces in
2561 // production once the listener-default HSTS reaches a
2562 // non-trivial share of frontends.
2563 if matches!(self.position, super::Position::Server)
2564 && !parts.context.headers_response.is_empty()
2565 {
2566 let edits = std::mem::take(&mut parts.context.headers_response);
2567 super::shared::apply_response_header_edits(kawa, &edits);
2568 }
2569 kawa.prepare(&mut converter);
2570 // The pre-prepare gate at line 2483 only inserts into
2571 // `rst_sent` when `kawa.is_error()` is already true on
2572 // entry. The HPACK over-budget abort path
2573 // (`H2BlockConverter::check_header_capacity` →
2574 // `finalize`) flips `parsing_phase` to Error AND pushes
2575 // its own RST_STREAM frame inside this same prepare
2576 // pass; without a post-prepare insert here the next
2577 // writable cycle would gate-pass and double-emit a
2578 // RST_STREAM via the existing `initialize` chokepoint.
2579 //
2580 // Per Codex P2: the converter's direct RST emission
2581 // bypasses the metric/flood accounting that
2582 // `Self::reset_stream` performs. Mirror it here so a
2583 // peer that drives oversized headers across many
2584 // streams cannot escape the MadeYouReset emitted-RST
2585 // lifetime cap and so dashboards see the per-error
2586 // counter and the global tx counter.
2587 //
2588 // Per Codex P3: when an incremental stream flips to
2589 // Error mid-prepare, the RFC 9218 §4 yield-after-one
2590 // accounting must drop this stream from the
2591 // same-urgency ready bucket so trailing peers see the
2592 // live count.
2593 let freshly_rst_post_prepare = kawa.is_error() && self.rst_sent.insert(stream_id);
2594 if freshly_rst_post_prepare {
2595 // Defer accounting until after `drop(converter)`; same
2596 // reason as the pre-prepare collector above.
2597 freshly_emitted_rsts.push(rst_error_from_kawa(kawa));
2598 if is_incremental {
2599 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2600 *c = c.saturating_sub(1);
2601 }
2602 }
2603 }
2604 let consumed = window - converter.window;
2605 *parts.window = parts.window.saturating_sub(consumed);
2606 self.flow_control.window = self.flow_control.window.saturating_sub(consumed);
2607 if is_incremental && consumed > 0 && first_incremental_fired.is_none() {
2608 first_incremental_fired = Some(stream_id);
2609 }
2610 }
2611 context.debug.push(DebugEvent::S(
2612 stream_id,
2613 global_stream_id,
2614 kawa.parsing_phase,
2615 kawa.blocks.len(),
2616 kawa.out.len(),
2617 ));
2618 let mut stream_bytes: usize = 0;
2619 let outcome = Self::flush_stream_out(
2620 &mut self.socket,
2621 kawa,
2622 parts.metrics,
2623 &self.position,
2624 &mut self.readiness,
2625 &mut context.debug,
2626 3,
2627 global_stream_id,
2628 Some(&mut socket_write),
2629 None,
2630 &mut io_slices,
2631 Some(&mut stream_bytes),
2632 );
2633 // Refresh the per-stream idle timer on outbound bytes. Without
2634 // this, a long-running response trickled at low bandwidth would
2635 // be killed by `cancel_timed_out_streams` mid-delivery — the
2636 // inbound-only refresh at h2.rs:3887-3895 / 4026-4031 never
2637 // fires while the peer is idle.
2638 if stream_bytes > 0 {
2639 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
2640 *t = Instant::now();
2641 }
2642 }
2643 total_bytes_written = total_bytes_written.saturating_add(stream_bytes);
2644 if outcome == FlushOutcome::Stalled {
2645 self.expect_write = Some(H2StreamId::Other {
2646 id: stream_id,
2647 gid: global_stream_id,
2648 });
2649 break 'outer;
2650 }
2651 self.expect_write = None;
2652 if (kawa.is_terminated() || kawa.is_error())
2653 && kawa.is_completed()
2654 && !Self::handle_1xx_reset(kawa, stream_state, &mut endpoint)
2655 {
2656 let close_frontend =
2657 matches!(self.position, Position::Server) && !parts.context.keep_alive_frontend;
2658 let (client_rtt, server_rtt) = Self::snapshot_rtts(
2659 &self.position,
2660 &self.socket,
2661 &endpoint,
2662 stream.linked_token(),
2663 );
2664
2665 if let Some((dead_id, token)) = Self::try_recycle_server_stream(
2666 &self.position,
2667 &mut self.bytes,
2668 &self.streams,
2669 stream,
2670 global_stream_id,
2671 stream_id,
2672 byte_totals,
2673 &mut context.debug,
2674 context.listener.clone(),
2675 client_rtt,
2676 server_rtt,
2677 ) {
2678 completed_streams.push((dead_id, global_stream_id, token, close_frontend));
2679 // LIFECYCLE §9 invariant 17: decrement INSIDE 'outer so
2680 // later iterations see the reduced count. The post-loop
2681 // retirement at remove_dead_stream is too late.
2682 if is_incremental {
2683 if let Some(c) = ready_incremental_by_urgency.get_mut(&urgency) {
2684 *c = c.saturating_sub(1);
2685 }
2686 }
2687 }
2688 }
2689 }
2690 gauge!(
2691 "h2.streams.ready_incremental.by_urgency",
2692 ready_incremental_by_urgency
2693 .values()
2694 .copied()
2695 .sum::<usize>()
2696 );
2697 // Reclaim the converter's reusable buffers before any &mut self calls,
2698 // since the converter borrows self.encoder.
2699 let converter_out = std::mem::take(&mut converter.out);
2700 let lowercase_buf = std::mem::take(&mut converter.lowercase_buf);
2701 let cookie_buf = std::mem::take(&mut converter.cookie_buf);
2702 // RFC 7541 §6.3: clear our mirror of the pending size-update only
2703 // AFTER the converter confirmed the signal was emitted to its
2704 // output buffer. A DATA-only pass leaves `size_update_emitted` as
2705 // `false` so the signal stays queued for the next pass with a
2706 // header block.
2707 let size_update_emitted = converter.size_update_emitted;
2708 drop(converter);
2709 if size_update_emitted {
2710 self.pending_table_size_update = None;
2711 }
2712 // Account every RST that the converter emitted during this pass
2713 // (pre-prepare gate + post-prepare HPACK over-budget abort) so
2714 // the global tx counter, the per-error breakdown, and the
2715 // MadeYouReset emitted-RST lifetime cap stay in step. If the
2716 // cap trips, propagate the GOAWAY result.
2717 for error in freshly_emitted_rsts {
2718 if let Some(result) = self.account_emitted_rst(error) {
2719 return result;
2720 }
2721 }
2722 self.converter_buf = converter_out;
2723 self.lowercase_buf = lowercase_buf;
2724 self.cookie_buf = cookie_buf;
2725 self.shrink_converter_buffers();
2726 // RFC 9218 §4: commit the round-robin cursor so the next writable
2727 // cycle begins with the stream immediately after the one we fired
2728 // first this pass.
2729 self.prioriser
2730 .advance_incremental_cursor(first_incremental_fired);
2731 let mut close_frontend_after_completed_stream = false;
2732 for (dead_id, global_stream_id, token, close_frontend) in completed_streams {
2733 // The main write loop borrows self.encoder, so we can't mutate the
2734 // H2 maps inline. Retire the recycled stream immediately after the
2735 // converter borrow ends, before endpoint.end_stream() can trigger
2736 // teardown and observe a stale `Recycle` entry in self.streams.
2737 self.remove_dead_stream(dead_id, global_stream_id);
2738 close_frontend_after_completed_stream |= close_frontend;
2739 if let Some(token) = token {
2740 remove_backend_stream(&mut context.backend_streams, token, global_stream_id);
2741 endpoint.end_stream(token, global_stream_id, context);
2742 }
2743 }
2744 if close_frontend_after_completed_stream && !self.drain.draining {
2745 return if self.streams.is_empty() {
2746 self.goaway(H2Error::NoError)
2747 } else {
2748 self.graceful_goaway()
2749 };
2750 }
2751 self.finalize_write(socket_write, total_bytes_written, context)
2752 }
2753
2754 /// Remove streams that completed their lifecycle from all tracking maps.
2755 /// After forwarding a 1xx informational response (100 Continue, 103 Early Hints),
2756 /// reset the back buffer and re-enable backend readable so the final response
2757 /// can arrive on the same stream. Returns true if the response was 1xx.
2758 #[allow(clippy::too_many_arguments)]
2759 fn flush_stream_out(
2760 socket: &mut Front,
2761 kawa: &mut GenericHttpStream,
2762 metrics: &mut SessionMetrics,
2763 position: &Position,
2764 readiness: &mut Readiness,
2765 debug: &mut DebugHistory,
2766 debug_site: usize,
2767 global_stream_id: GlobalStreamId,
2768 mut wrote: Option<&mut bool>,
2769 cross_read_amount: Option<usize>,
2770 io_slices: &mut Vec<IoSlice<'static>>,
2771 mut bytes_written: Option<&mut usize>,
2772 ) -> FlushOutcome {
2773 while !kawa.out.is_empty() {
2774 if let Some(flag) = wrote.as_deref_mut() {
2775 *flag = true;
2776 }
2777 io_slices.clear();
2778 let buffer = kawa.storage.buffer();
2779 for block in kawa.out.iter() {
2780 match block {
2781 kawa::OutBlock::Delimiter => break,
2782 kawa::OutBlock::Store(store) => {
2783 let data = store.data(buffer);
2784 // SAFETY: the IoSlice references point into kawa's
2785 // storage buffer. They are used only for the
2786 // socket_write_vectored call below and cleared
2787 // immediately after, before kawa.consume() which may
2788 // relocate the buffer via ptr::copy (shift). No
2789 // dangling 'static refs exist during consume().
2790 let data: &'static [u8] =
2791 unsafe { std::slice::from_raw_parts(data.as_ptr(), data.len()) };
2792 io_slices.push(IoSlice::new(data));
2793 }
2794 }
2795 }
2796 let (size, status) = socket.socket_write_vectored(io_slices);
2797 io_slices.clear();
2798 debug_assert!(
2799 io_slices.is_empty(),
2800 "IoSlice refs must be cleared before consume"
2801 );
2802 debug.push(DebugEvent::SocketIO(debug_site, global_stream_id, size));
2803 kawa.consume(size);
2804 position.count_bytes_out_counter(size);
2805 position.count_bytes_out(metrics, size);
2806 if let Some(counter) = bytes_written.as_deref_mut() {
2807 *counter = counter.saturating_add(size);
2808 }
2809 if let Some(amount) = cross_read_amount {
2810 // Resume path: same stream is parked waiting for buffer space.
2811 // Re-enable READABLE once the write freed enough room.
2812 if kawa.storage.available_space() >= amount {
2813 readiness.interest.insert(Ready::READABLE);
2814 }
2815 }
2816 if update_readiness_after_write(size, status, readiness) {
2817 return FlushOutcome::Stalled;
2818 }
2819 }
2820 FlushOutcome::Drained
2821 }
2822
2823 fn handle_1xx_reset<E: Endpoint>(
2824 kawa: &mut GenericHttpStream,
2825 stream_state: StreamState,
2826 endpoint: &mut E,
2827 ) -> bool {
2828 let is_1xx = matches!(
2829 kawa.detached.status_line,
2830 kawa::StatusLine::Response { code, .. } if (100..200).contains(&code)
2831 );
2832 if !is_1xx {
2833 return false;
2834 }
2835 debug!(
2836 "{} H2 write_streams: 1xx informational forwarded, resetting back buffer",
2837 log_module_context!()
2838 );
2839 kawa.clear();
2840 if let StreamState::Linked(token) = stream_state {
2841 let readiness = endpoint.readiness_mut(token);
2842 readiness.interest.insert(Ready::READABLE);
2843 readiness.signal_pending_read();
2844 }
2845 true
2846 }
2847
2848 /// Re-arm edge-triggered WRITABLE event if rustls still has buffered TLS data.
2849 fn ensure_tls_flushed(&mut self) {
2850 if self.socket.socket_wants_write() {
2851 self.readiness.signal_pending_write();
2852 }
2853 }
2854
2855 /// Evict every per-stream piece of state carried by this `ConnectionH2`.
2856 ///
2857 /// **Invariant**: `rst_sent`, `stream_last_activity_at` and `prioriser`
2858 /// MUST be emptied of `stream_id` here — they are the only three
2859 /// per-stream caches that are not stored in the slab-allocated
2860 /// `Context.streams[]`. Forgetting any of them causes unbounded memory
2861 /// growth on long-lived connections with many cancelled streams. The
2862 /// `debug_assert`s below fail loudly in test builds if someone adds a
2863 /// new per-stream cache without updating this function.
2864 fn remove_dead_stream(&mut self, stream_id: StreamId, global_stream_id: GlobalStreamId) {
2865 if self.streams.remove(&stream_id).is_none() {
2866 error!(
2867 "{} dead stream_id {} missing from streams map",
2868 log_context!(self),
2869 stream_id
2870 );
2871 }
2872 self.rst_sent.remove(&stream_id);
2873 self.stream_last_activity_at.remove(&stream_id);
2874 self.prioriser.remove(&stream_id);
2875 debug_assert!(
2876 !self.rst_sent.contains(&stream_id),
2877 "rst_sent still contains stream_id {stream_id} after eviction"
2878 );
2879 debug_assert!(
2880 !self.stream_last_activity_at.contains_key(&stream_id),
2881 "stream_last_activity_at still contains stream_id {stream_id} after eviction"
2882 );
2883 // Invariant: expect_write/expect_read must not reference a gid whose
2884 // context slot may be popped by shrink_trailing_recycle after eviction.
2885 if matches!(self.expect_write, Some(H2StreamId::Other { gid, .. }) if gid == global_stream_id)
2886 {
2887 self.expect_write = None;
2888 }
2889 if matches!(
2890 self.expect_read,
2891 Some((H2StreamId::Other { gid, .. }, _)) if gid == global_stream_id
2892 ) {
2893 self.expect_read = None;
2894 }
2895 }
2896
2897 /// Drop stream-id mappings for streams that never became active before a
2898 /// connection-level close. This happens on incomplete/oversized header
2899 /// blocks: the stream slot is created on the initial HEADERS frame, then a
2900 /// GOAWAY closes the connection before the request is fully materialized.
2901 fn prune_inactive_streams_while_closing<L>(&mut self, context: &mut Context<L>)
2902 where
2903 L: ListenerHandler + L7ListenerHandler,
2904 {
2905 if !self.drain.draining || !matches!(self.state, H2State::GoAway | H2State::Error) {
2906 return;
2907 }
2908
2909 let stale_streams = self
2910 .streams
2911 .iter()
2912 .filter_map(|(&stream_id, &global_stream_id)| {
2913 (!context.streams[global_stream_id].state.is_open())
2914 .then_some((stream_id, global_stream_id))
2915 })
2916 .collect::<Vec<_>>();
2917
2918 for (stream_id, global_stream_id) in stale_streams {
2919 let stream = &mut context.streams[global_stream_id];
2920 if stream.state == StreamState::Idle {
2921 stream.front.clear();
2922 stream.front.storage.clear();
2923 stream.back.clear();
2924 stream.back.storage.clear();
2925 stream.metrics.reset();
2926 stream.state = StreamState::Recycle;
2927 }
2928 self.remove_dead_stream(stream_id, global_stream_id);
2929 }
2930 }
2931
2932 /// Shrink reusable converter buffers when they grow beyond 16 KB to avoid
2933 /// holding memory after a burst of large headers.
2934 fn shrink_converter_buffers(&mut self) {
2935 if self.converter_buf.capacity() > 16_384 {
2936 self.converter_buf.shrink_to(4096);
2937 }
2938 if self.lowercase_buf.capacity() > 16_384 {
2939 self.lowercase_buf.shrink_to(4096);
2940 }
2941 if self.cookie_buf.capacity() > 16_384 {
2942 self.cookie_buf.shrink_to(4096);
2943 }
2944 }
2945
2946 /// Post-write phase: check drain completion, flush TLS, and update readiness.
2947 ///
2948 /// `bytes_written_this_pass` reports the total outbound bytes `write_streams`
2949 /// pushed to the socket (across every stream), and is used to distinguish
2950 /// two very different "no `expect_write`" states:
2951 ///
2952 /// - **Voluntary yield with progress**: at least one DATA/HEADERS frame
2953 /// emitted, but a stream left non-empty `back.out`/`back.blocks` because
2954 /// the converter yielded (e.g. RFC 9218 incremental rotation). LIFECYCLE
2955 /// §9 invariant 16: keep `Ready::WRITABLE` armed so the session loop can
2956 /// resume flushing on the next tick without waiting for an external
2957 /// wake-up that edge-triggered epoll will not deliver.
2958 /// - **No progress at all**: converter pushed every block back (e.g. flow
2959 /// window exhausted, no HEADERS ready yet). Strip `Ready::WRITABLE` —
2960 /// forward progress must come from an external trigger
2961 /// (`WINDOW_UPDATE`, new request), not from looping writable().
2962 ///
2963 /// Returns `MuxResult::Continue` in the normal case, or triggers a graceful
2964 /// GOAWAY when draining and all streams have completed.
2965 fn finalize_write<L>(
2966 &mut self,
2967 socket_write: bool,
2968 bytes_written_this_pass: usize,
2969 context: &mut Context<L>,
2970 ) -> MuxResult
2971 where
2972 L: ListenerHandler + L7ListenerHandler,
2973 {
2974 // RFC 9113 §6.8: if draining and all streams have completed,
2975 // send the final GOAWAY with the actual last_stream_id
2976 if self.drain.draining && self.streams.is_empty() {
2977 return self.graceful_goaway();
2978 }
2979
2980 if self.socket.socket_wants_write() {
2981 if !socket_write {
2982 self.socket.socket_write(&[]);
2983 }
2984 // Edge-triggered epoll: re-arm WRITABLE if rustls still has
2985 // pending encrypted data (first check triggers flush, second re-checks).
2986 self.ensure_tls_flushed();
2987 } else if self.expect_write.is_none() {
2988 // LIFECYCLE §9 invariant 16: retain `Ready::WRITABLE` when a
2989 // voluntary scheduler yield leaves stranded bytes in a stream's
2990 // `back.out`/`back.blocks` *after* the pass made forward
2991 // progress. Requiring progress avoids the degenerate no-progress
2992 // loop (e.g. flow-control-starved streams) that would otherwise
2993 // busy-spin against the session dispatcher.
2994 if bytes_written_this_pass > 0
2995 && any_stream_has_pending_back(&self.streams, &context.streams)
2996 {
2997 #[cfg(debug_assertions)]
2998 context.debug.push(DebugEvent::Str(
2999 "finalize_write: invariant 16 retained WRITABLE (pending back-buffer)"
3000 .to_owned(),
3001 ));
3002 } else if !self.pending_rst_streams.is_empty()
3003 || !self.flow_control.pending_window_updates.is_empty()
3004 {
3005 // Control-frame liveness: `flush_pending_control_frames` is
3006 // gated on `expect_write.is_none()`, so when a prior partial
3007 // write deferred the flush the RST / WINDOW_UPDATE queues
3008 // stay non-empty after `expect_write` finally drains. Without
3009 // this rearm the next tick would drop `Ready::WRITABLE` and
3010 // the queued RST would stall until an unrelated event
3011 // re-triggered writable — which is exactly the scenario
3012 // h2spec trips by sending back-to-back malformed streams.
3013 #[cfg(debug_assertions)]
3014 context.debug.push(DebugEvent::Str(
3015 "finalize_write: retained WRITABLE (control queue non-empty)".to_owned(),
3016 ));
3017 self.readiness.arm_writable();
3018 incr!(names::h2::SIGNAL_WRITABLE_REARMED_CONTROL_QUEUE);
3019 } else {
3020 // We wrote everything
3021 #[cfg(debug_assertions)]
3022 context.debug.push(DebugEvent::Str(format!(
3023 "Wrote everything: {:?}",
3024 self.streams
3025 )));
3026 self.readiness.interest.remove(Ready::WRITABLE);
3027 }
3028 }
3029 MuxResult::Continue
3030 }
3031
3032 /// Flush pending control frames (zero-buffer resume, WINDOW_UPDATEs, RST_STREAMs)
3033 /// before entering the main writable state machine.
3034 ///
3035 /// Returns `Some(result)` if the caller should return early (e.g. socket would
3036 /// block, GOAWAY triggered), or `None` if writable() should proceed normally.
3037 fn flush_pending_control_frames(&mut self) -> Option<MuxResult> {
3038 if self.frontend_hung_up_while_draining() {
3039 self.expect_write = None;
3040 self.zero.storage.clear();
3041 self.flow_control.pending_window_updates.clear();
3042 self.pending_rst_streams.clear();
3043 }
3044
3045 // RFC 9113 §6.5: check if peer has timed out on SETTINGS ACK
3046 if let Some(sent_at) = self.settings_sent_at {
3047 if sent_at.elapsed() >= SETTINGS_ACK_TIMEOUT {
3048 warn!(
3049 "{} SETTINGS ACK timeout: no SETTINGS ACK observed within {:?}",
3050 log_context!(self),
3051 SETTINGS_ACK_TIMEOUT
3052 );
3053 return Some(self.goaway(H2Error::SettingsTimeout));
3054 }
3055 }
3056
3057 // Stage — resume zero-buffer flush.
3058 // If a previous write was partial, finish it before serialising any
3059 // new control frames. Don't reset the timeout for control frame
3060 // writes (SETTINGS ACK, PING response, WINDOW_UPDATE) — only
3061 // application-data writes should reset it.
3062 if let Some(H2StreamId::Zero) = self.expect_write {
3063 if self.flush_zero_to_socket() {
3064 self.ensure_tls_flushed();
3065 return Some(MuxResult::Continue);
3066 }
3067 // When H2StreamId::Zero is used to write, READABLE is disabled —
3068 // re-enable it now that the flush is complete.
3069 self.readiness.interest.insert(Ready::READABLE);
3070 self.expect_write = None;
3071 }
3072
3073 // Stage — drain pending WINDOW_UPDATE frames.
3074 // Serialize and flush them inline to avoid extra event loop
3075 // iterations that could cause response data to be sent before
3076 // subsequent frames are validated.
3077 if !self.flow_control.pending_window_updates.is_empty() && self.expect_write.is_none() {
3078 let kawa = &mut self.zero;
3079 kawa.storage.clear();
3080 let buf = kawa.storage.space();
3081 let mut offset = 0;
3082 // Track which entries we successfully serialized so we can remove them.
3083 // Each WINDOW_UPDATE frame is 13 bytes (9-byte header + 4-byte payload).
3084 let mut written_ids = Vec::new();
3085 for (&stream_id, &increment) in &self.flow_control.pending_window_updates {
3086 if increment == 0 {
3087 written_ids.push(stream_id);
3088 continue;
3089 }
3090 match serializer::gen_window_update(&mut buf[offset..], stream_id, increment) {
3091 Ok((_, size)) => {
3092 offset += size;
3093 written_ids.push(stream_id);
3094 incr!(names::h2::FRAMES_TX_WINDOW_UPDATE);
3095 }
3096 Err(_) => {
3097 // Buffer full — stop here, remaining entries stay in the map
3098 break;
3099 }
3100 }
3101 }
3102 // Remove only the entries we successfully wrote (or skipped)
3103 for id in written_ids {
3104 self.flow_control.pending_window_updates.remove(&id);
3105 }
3106 if offset > 0 {
3107 kawa.storage.fill(offset);
3108 if self.flush_zero_to_socket() {
3109 self.expect_write = Some(H2StreamId::Zero);
3110 // Edge-triggered epoll: ensure pending TLS data gets flushed
3111 if self.socket.socket_wants_write() {
3112 self.readiness.event.insert(Ready::WRITABLE);
3113 }
3114 return Some(MuxResult::Continue);
3115 }
3116 }
3117 }
3118
3119 // Stage — RST_STREAM cap check + drain.
3120 // Check the lifetime total (not just pending queue length) because
3121 // writable() drains the queue between readable() calls, so the
3122 // pending count alone may never reach the cap even under sustained
3123 // misbehavior.
3124 if !matches!(self.state, H2State::GoAway | H2State::Error)
3125 && self.total_rst_streams_queued >= MAX_PENDING_RST_STREAMS
3126 {
3127 error!(
3128 "{} total RST_STREAM count {} exceeds cap {}, sending GOAWAY(ENHANCE_YOUR_CALM)",
3129 log_context!(self),
3130 self.total_rst_streams_queued,
3131 MAX_PENDING_RST_STREAMS
3132 );
3133 return Some(self.goaway(H2Error::EnhanceYourCalm));
3134 }
3135
3136 // Flush pending RST_STREAM frames (queued when refusing streams).
3137 // Accounting happens at queue-time inside `Self::enqueue_rst`, so
3138 // this drain only serialises and flushes — no metric/flood calls
3139 // here would double-count.
3140 if !self.pending_rst_streams.is_empty() && self.expect_write.is_none() {
3141 let kawa = &mut self.zero;
3142 kawa.storage.clear();
3143 let buf = kawa.storage.space();
3144 let mut offset = 0;
3145 let mut written_count = 0;
3146 for &(stream_id, ref error) in &self.pending_rst_streams {
3147 let frame_size =
3148 parser::FRAME_HEADER_SIZE + parser::RST_STREAM_PAYLOAD_SIZE as usize;
3149 if offset + frame_size > buf.len() {
3150 break;
3151 }
3152 match serializer::gen_rst_stream(&mut buf[offset..], stream_id, error.to_owned()) {
3153 Ok((_, _)) => {
3154 offset += frame_size;
3155 written_count += 1;
3156 }
3157 Err(_) => break,
3158 }
3159 }
3160 self.pending_rst_streams.drain(..written_count);
3161 if offset > 0 {
3162 kawa.storage.fill(offset);
3163 if self.flush_zero_to_socket() {
3164 self.expect_write = Some(H2StreamId::Zero);
3165 // Edge-triggered epoll: ensure pending TLS data gets flushed
3166 if self.socket.socket_wants_write() {
3167 self.readiness.event.insert(Ready::WRITABLE);
3168 }
3169 return Some(MuxResult::Continue);
3170 }
3171 }
3172 }
3173
3174 None
3175 }
3176
3177 pub fn writable<E, L>(&mut self, context: &mut Context<L>, endpoint: E) -> MuxResult
3178 where
3179 E: Endpoint,
3180 L: ListenerHandler + L7ListenerHandler,
3181 {
3182 self.prune_inactive_streams_while_closing(context);
3183
3184 if let Some(result) = self.flush_pending_control_frames() {
3185 return result;
3186 }
3187
3188 // Flush any pending TLS records before state-specific processing.
3189 // This ensures response DATA frames that were accepted by rustls
3190 // (via socket_write_vectored in write_streams) are pushed to the
3191 // TCP socket even when the connection is in GoAway or Error state.
3192 // Without this, the state-specific handlers may call force_disconnect()
3193 // before the response data reaches the kernel's TCP send buffer.
3194 if self.socket.socket_wants_write() {
3195 self.socket.socket_write(&[]);
3196 }
3197
3198 match (&self.state, &self.position) {
3199 (H2State::Error, Position::Server) => {
3200 if self.socket.socket_wants_write() {
3201 self.ensure_tls_flushed();
3202 MuxResult::Continue
3203 } else {
3204 MuxResult::CloseSession
3205 }
3206 }
3207 (H2State::Error, _)
3208 | (H2State::ClientSettings, Position::Server)
3209 | (H2State::ServerSettings, Position::Client(..)) => {
3210 error!(
3211 "{} Unexpected combination: (Writable, {:?}, {:?})",
3212 log_context!(self),
3213 self.state,
3214 self.position
3215 );
3216 self.force_disconnect()
3217 }
3218 (H2State::ClientPreface, Position::Server) => MuxResult::Continue,
3219 // Discard state: pending data (e.g. RST_STREAM) was already
3220 // written in the preamble above; let the readable path consume
3221 // the remaining frame payload.
3222 (H2State::Discard, _) => MuxResult::Continue,
3223 (H2State::GoAway, _) => {
3224 if self.peer_gone_after_final_goaway() {
3225 return MuxResult::CloseSession;
3226 }
3227 // Flush any remaining TLS response data before disconnecting.
3228 // The GoAway state only enters after control frames (our GOAWAY
3229 // response) are flushed above, but response DATA frames may still
3230 // be in rustls's TLS output buffer — accepted by socket_write_vectored
3231 // during write_streams() but not yet flushed to TCP. Under TCP
3232 // backpressure (HAProxy chain), this is the primary truncation vector.
3233 if self.socket.socket_wants_write() {
3234 self.socket.socket_write(&[]);
3235 if self.socket.socket_wants_write() {
3236 // TLS data still pending (TCP backpressure) — don't disconnect
3237 // yet. Re-arm WRITABLE so the event loop retries the flush.
3238 self.ensure_tls_flushed();
3239 return MuxResult::Continue;
3240 }
3241 }
3242 self.force_disconnect()
3243 }
3244 (H2State::ClientPreface, Position::Client(..)) => {
3245 trace!("{} Preparing preface and settings", log_context!(self));
3246 let pri = serializer::H2_PRI.as_bytes();
3247 let kawa = &mut self.zero;
3248
3249 kawa.storage.space()[0..pri.len()].copy_from_slice(pri);
3250 kawa.storage.fill(pri.len());
3251 match serializer::gen_settings(kawa.storage.space(), &self.local_settings) {
3252 Ok((_, size)) => {
3253 kawa.storage.fill(size);
3254 incr!(names::h2::FRAMES_TX_SETTINGS);
3255 // RFC 9113 §6.5: start tracking SETTINGS ACK timeout
3256 self.settings_sent_at = Some(Instant::now());
3257 }
3258 Err(error) => {
3259 error!(
3260 "{} Could not serialize SettingsFrame: {:?}",
3261 log_context!(self),
3262 error
3263 );
3264 return self.force_disconnect();
3265 }
3266 };
3267
3268 self.state = H2State::ClientSettings;
3269 self.expect_write = Some(H2StreamId::Zero);
3270 MuxResult::Continue
3271 }
3272 (H2State::ClientSettings, Position::Client(..)) => {
3273 trace!("{} Sent preface and settings", log_context!(self));
3274 self.state = H2State::ServerSettings;
3275 self.expect_read = Some((H2StreamId::Zero, 9));
3276 self.readiness.interest.remove(Ready::WRITABLE);
3277 MuxResult::Continue
3278 }
3279 (H2State::ServerSettings, Position::Server) => {
3280 // Enlarge the connection-level receive window beyond the RFC default
3281 // of 65 535 bytes. The configured window size is too small for
3282 // high-throughput proxying and causes excessive WINDOW_UPDATE
3283 // round-trips. Use additive increment rather than unconditional
3284 // assignment to preserve any window changes that occurred during
3285 // setup. Skip if the configured window equals the default (no
3286 // enlargement needed), since a zero-increment WINDOW_UPDATE
3287 // violates RFC 9113 §6.9.
3288 let increment = self
3289 .connection_config
3290 .initial_connection_window
3291 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
3292 if increment > 0 {
3293 self.queue_window_update(0, increment);
3294 }
3295 // Do NOT increment flow_control.window here: sending our own
3296 // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
3297 // Our send window is only updated by WINDOW_UPDATEs we receive
3298 // from the peer (RFC 9113 §6.9).
3299 self.expect_header();
3300 // Keep WRITABLE so the queued WINDOW_UPDATE gets flushed.
3301 MuxResult::Continue
3302 }
3303 // Proxying states — writing application data (request/response).
3304 // Reset the timeout here, not at the top of writable(), so that
3305 // control frame writes (PING, WINDOW_UPDATE) don't reset it.
3306 (H2State::Header, _)
3307 | (H2State::Frame(_), _)
3308 | (H2State::ContinuationFrame(_), _)
3309 | (H2State::ContinuationHeader(_), _) => self.write_streams(context, endpoint),
3310 }
3311 }
3312
3313 /// Snapshot the access-log RTTs for the local frontend and the linked backend.
3314 ///
3315 /// `Position::Server`-only. On a backend H2 connection (`Position::Client`)
3316 /// the snapshot would write swapped values onto the shared `Stream.metrics`:
3317 /// the connection's `socket` is the upstream and the corresponding
3318 /// `EndpointServer::socket` returns the frontend, so the per-stream
3319 /// `client_rtt`/`server_rtt` cells would be populated with mislabelled
3320 /// values. Gating keeps backend H2 from poisoning the access-log metric
3321 /// for the matching frontend stream.
3322 ///
3323 /// Callers must invoke this BEFORE `endpoint.end_stream(...)` on reset
3324 /// paths so the backend lookup does not depend on
3325 /// `EndpointClient::end_stream` continuing to leave entries in
3326 /// `Router.backends`.
3327 ///
3328 /// Takes individual field references (not `&self`) for the same reason
3329 /// `try_recycle_server_stream` does — to avoid borrow conflicts with the
3330 /// `H2BlockConverter` that holds `&mut self.encoder` during the per-stream
3331 /// write loop.
3332 fn snapshot_rtts<E: Endpoint>(
3333 position: &Position,
3334 socket: &Front,
3335 endpoint: &E,
3336 linked_token: Option<mio::Token>,
3337 ) -> (Option<Duration>, Option<Duration>) {
3338 if !position.is_server() {
3339 return (None, None);
3340 }
3341 (
3342 socket_rtt(socket.socket_ref()),
3343 linked_token
3344 .and_then(|t| endpoint.socket(t))
3345 .and_then(socket_rtt),
3346 )
3347 }
3348
3349 /// Try to recycle a completed server-side stream by distributing overhead,
3350 /// generating access logs, and transitioning the stream to `Recycle` state.
3351 ///
3352 /// Returns `Some((stream_id, Option<token>))` if the stream was recycled, so the
3353 /// caller can add `stream_id` to the dead-streams list and call `endpoint.end_stream()`
3354 /// if a token was returned. Returns `None` if recycling was deferred or not applicable.
3355 ///
3356 /// Takes individual field references instead of `&mut self` to avoid borrow
3357 /// conflicts when the H2 block converter holds `&mut self.encoder`.
3358 /// `client_rtt`/`server_rtt` are snapshotted by the caller (which still
3359 /// owns `&self.socket` and `&endpoint`) and forwarded into the access log.
3360 #[allow(clippy::too_many_arguments)]
3361 fn try_recycle_server_stream<L>(
3362 position: &Position,
3363 bytes: &mut H2ByteAccounting,
3364 streams: &HashMap<StreamId, GlobalStreamId>,
3365 stream: &mut crate::protocol::mux::Stream,
3366 global_stream_id: GlobalStreamId,
3367 stream_id: StreamId,
3368 byte_totals: (usize, usize),
3369 debug: &mut DebugHistory,
3370 listener: std::rc::Rc<std::cell::RefCell<L>>,
3371 client_rtt: Option<Duration>,
3372 server_rtt: Option<Duration>,
3373 ) -> Option<(StreamId, Option<mio::Token>)>
3374 where
3375 L: ListenerHandler + L7ListenerHandler,
3376 {
3377 match position {
3378 Position::Client(..) => None,
3379 Position::Server => {
3380 // Already logged by a reset path; retire the stream after its RST is flushed.
3381 if stream.metrics.start.is_none() {
3382 let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3383 return match state {
3384 StreamState::Linked(token) => Some((stream_id, Some(token))),
3385 _ => Some((stream_id, None)),
3386 };
3387 }
3388
3389 // Don't recycle if the client hasn't sent END_STREAM yet —
3390 // more DATA frames may arrive for this stream.
3391 if !stream.front_received_end_of_stream {
3392 trace!(
3393 "{} Defer recycle stream {}: client still sending",
3394 log_module_context!(),
3395 global_stream_id
3396 );
3397 return None;
3398 }
3399 let stream_bytes = (
3400 stream.metrics.bin + stream.metrics.backend_bin,
3401 stream.metrics.bout + stream.metrics.backend_bout,
3402 );
3403 distribute_overhead(
3404 &mut stream.metrics,
3405 &mut bytes.overhead_bin,
3406 &mut bytes.overhead_bout,
3407 stream_bytes,
3408 byte_totals,
3409 streams.len(),
3410 streams.len() == 1,
3411 );
3412 debug.push(DebugEvent::StreamEvent(4, global_stream_id));
3413 trace!(
3414 "{} Recycle stream: {}",
3415 log_module_context!(),
3416 global_stream_id
3417 );
3418 let token = Self::complete_server_stream(stream, listener, client_rtt, server_rtt);
3419 Some((stream_id, token))
3420 }
3421 }
3422 }
3423
3424 /// Finalize a server-side stream after its response has been fully written.
3425 ///
3426 /// Generates an access log, resets metrics, and transitions the stream to `Recycle`.
3427 /// Returns the backend token if the stream was `Linked`, so the caller can call
3428 /// `endpoint.end_stream()` with the full `Context` (which can't be passed here
3429 /// because `stream` borrows from `context.streams`).
3430 ///
3431 /// Callers must distribute overhead *before* calling this, since the converter
3432 /// borrow may prevent `distribute_overhead()`.
3433 fn complete_server_stream<L>(
3434 stream: &mut crate::protocol::mux::Stream,
3435 listener: std::rc::Rc<std::cell::RefCell<L>>,
3436 client_rtt: Option<Duration>,
3437 server_rtt: Option<Duration>,
3438 ) -> Option<mio::Token>
3439 where
3440 L: ListenerHandler + L7ListenerHandler,
3441 {
3442 incr!(names::http::E2E_H2);
3443 stream.metrics.backend_stop();
3444 stream.generate_access_log(
3445 false,
3446 Some("H2::Complete"),
3447 listener,
3448 client_rtt,
3449 server_rtt,
3450 );
3451 stream.metrics.reset();
3452 let state = std::mem::replace(&mut stream.state, StreamState::Recycle);
3453 if let StreamState::Linked(token) = state {
3454 Some(token)
3455 } else {
3456 None
3457 }
3458 }
3459
3460 /// Compute the total bytes transferred across all active streams.
3461 ///
3462 /// Returns `(total_bytes_in, total_bytes_out)` where bytes_in = `bin + backend_bin`
3463 /// and bytes_out = `bout + backend_bout` for each stream.
3464 fn compute_stream_byte_totals<L: ListenerHandler + L7ListenerHandler>(
3465 &self,
3466 context: &Context<L>,
3467 ) -> (usize, usize) {
3468 let mut total_in = 0usize;
3469 let mut total_out = 0usize;
3470 for &gid in self.streams.values() {
3471 let m = &context.streams[gid].metrics;
3472 total_in += m.bin + m.backend_bin;
3473 total_out += m.bout + m.backend_bout;
3474 }
3475 (total_in, total_out)
3476 }
3477
3478 /// Distribute connection-level byte overhead proportionally to a single stream.
3479 ///
3480 /// `totals` should be pre-computed via [`compute_stream_byte_totals`] **before**
3481 /// taking a mutable borrow on the target stream, to avoid borrow conflicts.
3482 /// Delegates to the free function [`distribute_overhead`].
3483 fn distribute_overhead(&mut self, metrics: &mut SessionMetrics, totals: (usize, usize)) {
3484 let stream_bytes = (
3485 metrics.bin + metrics.backend_bin,
3486 metrics.bout + metrics.backend_bout,
3487 );
3488 distribute_overhead(
3489 metrics,
3490 &mut self.bytes.overhead_bin,
3491 &mut self.bytes.overhead_bout,
3492 stream_bytes,
3493 totals,
3494 self.streams.len(),
3495 self.streams.len() <= 1,
3496 );
3497 }
3498
3499 /// Attribute accumulated `zero_bytes_read` to the stream or to connection overhead.
3500 fn attribute_bytes_to_stream(&mut self, metrics: &mut SessionMetrics) {
3501 self.position
3502 .count_bytes_in(metrics, self.bytes.zero_bytes_read);
3503 self.bytes.zero_bytes_read = 0;
3504 }
3505
3506 fn attribute_bytes_to_overhead(&mut self) {
3507 self.bytes.overhead_bin += self.bytes.zero_bytes_read;
3508 self.bytes.zero_bytes_read = 0;
3509 }
3510
3511 /// Queue a WINDOW_UPDATE, coalescing with any existing entry for the same stream_id.
3512 /// RFC 9113 §6.9.1: window size increment MUST be 1..2^31-1 (0x7FFFFFFF).
3513 ///
3514 /// Always signals pending write so callers don't have to remember the
3515 /// edge-triggered epoll invariant (see memory feedback_epollet_signal_pending_write):
3516 /// under ET epoll a queued WINDOW_UPDATE without a live WRITABLE event bit
3517 /// is invisible to filter_interest() and will never get flushed.
3518 fn queue_window_update(&mut self, stream_id: u32, increment: u32) {
3519 let max_increment = i32::MAX as u32;
3520 if let Some(existing) = self.flow_control.pending_window_updates.get_mut(&stream_id) {
3521 let old = *existing;
3522 *existing = existing.saturating_add(increment).min(max_increment);
3523 trace!(
3524 "{} WINDOW_UPDATE coalesced: stream={} old={} new={}",
3525 log_context!(self),
3526 stream_id,
3527 old,
3528 *existing
3529 );
3530 } else if self.flow_control.pending_window_updates.len() < self.max_pending_window_updates {
3531 self.flow_control
3532 .pending_window_updates
3533 .insert(stream_id, increment.min(max_increment));
3534 trace!(
3535 "{} WINDOW_UPDATE queued: stream={} increment={}",
3536 log_context!(self),
3537 stream_id,
3538 increment.min(max_increment)
3539 );
3540 } else {
3541 error!(
3542 "{} WINDOW_UPDATE dropped: queue full ({} entries), stream={} increment={}",
3543 log_context!(self),
3544 self.max_pending_window_updates,
3545 stream_id,
3546 increment
3547 );
3548 incr!(names::h2::WINDOW_UPDATE_DROPPED);
3549 }
3550 self.readiness.arm_writable();
3551 }
3552
3553 /// Re-enable READABLE if this connection is parked waiting for buffer space
3554 /// and the target stream's buffer now has enough room.
3555 ///
3556 /// This is the cross-readiness counterpart to the same-connection check in
3557 /// `writable()`. When the *other side* of a stream (frontend or backend)
3558 /// drains data via its own `writable()`, it frees buffer space that this
3559 /// connection was waiting for. Without this explicit wake-up the connection
3560 /// stays parked and the session deadlocks until a timeout fires.
3561 ///
3562 /// Returns `true` if READABLE was re-enabled.
3563 pub fn try_resume_reading<L>(&mut self, context: &Context<L>) -> bool
3564 where
3565 L: ListenerHandler + L7ListenerHandler,
3566 {
3567 if let Some((
3568 H2StreamId::Other {
3569 gid: global_stream_id,
3570 ..
3571 },
3572 amount,
3573 )) = self.expect_read
3574 {
3575 let stream = &context.streams[global_stream_id];
3576 let kawa = match self.position {
3577 Position::Client(..) => &stream.back,
3578 Position::Server => &stream.front,
3579 };
3580 if kawa.storage.available_space() >= amount {
3581 self.readiness.interest.insert(Ready::READABLE);
3582 return true;
3583 }
3584 }
3585 false
3586 }
3587
3588 /// Mark a stream's position-appropriate end-of-stream flag.
3589 ///
3590 /// Server reads from the front (client), so sets `front_received_end_of_stream`.
3591 /// Client reads from the back (backend), so sets `back_received_end_of_stream`.
3592 fn mark_end_of_stream(&self, stream: &mut crate::protocol::mux::Stream) {
3593 if self.position.is_server() {
3594 stream.front_received_end_of_stream = true;
3595 } else {
3596 stream.back_received_end_of_stream = true;
3597 }
3598 }
3599
3600 /// Cancel streams that have been idle longer than [`Self::stream_idle_timeout`].
3601 ///
3602 /// A stream is considered idle when no meaningful application data (non-empty
3603 /// DATA frames or HEADERS) has been received since the last activity timestamp
3604 /// in [`Self::stream_last_activity_at`].
3605 ///
3606 /// Mitigates slow-multiplex Slowloris (Pass 4 Medium #3): the connection-level
3607 /// idle timer resets on every frame, so a peer sending periodic control frames
3608 /// can pin `max_concurrent_streams` slots for the full nominal connection timeout.
3609 /// Per-stream idle deadlines guarantee each stream terminates if it stops making
3610 /// forward progress, regardless of connection-level liveness.
3611 ///
3612 /// Timed-out streams receive RST_STREAM(CANCEL) and are immediately removed
3613 /// from the streams map so they no longer count against MAX_CONCURRENT_STREAMS.
3614 /// Backend endpoints are notified and metrics are finalized.
3615 pub fn cancel_timed_out_streams<E, L>(&mut self, context: &mut Context<L>, endpoint: &mut E)
3616 where
3617 E: Endpoint,
3618 L: ListenerHandler + L7ListenerHandler,
3619 {
3620 // Per-connection scratch Vecs (`converter_buf`, `lowercase_buf`,
3621 // `cookie_buf`, `priorities_buf`) grow to a
3622 // high-water mark and never shrink. On a long-lived idle H2
3623 // connection that briefly carried a flurry of large headers, the
3624 // backing memory stays pinned indefinitely. Reclaim past
3625 // `SCRATCH_BUF_RETAIN` when the connection has live streams but
3626 // each scratch buffer holds 4× the cap. Quiet-time only — runs
3627 // at the top of every `cancel_timed_out_streams` invocation
3628 // (which is itself called from the readable hot loop, but only
3629 // on a session that has been idle long enough to risk timing
3630 // out a stream).
3631 const SCRATCH_BUF_RETAIN: usize = 16 * 1024;
3632 if self.converter_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3633 self.converter_buf.shrink_to(SCRATCH_BUF_RETAIN);
3634 }
3635 if self.lowercase_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3636 self.lowercase_buf.shrink_to(SCRATCH_BUF_RETAIN);
3637 }
3638 if self.cookie_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3639 self.cookie_buf.shrink_to(SCRATCH_BUF_RETAIN);
3640 }
3641 if self.priorities_buf.capacity() > SCRATCH_BUF_RETAIN * 4 {
3642 self.priorities_buf.shrink_to(SCRATCH_BUF_RETAIN);
3643 }
3644
3645 if self.streams.is_empty() || self.stream_last_activity_at.is_empty() {
3646 return;
3647 }
3648 let now = Instant::now();
3649 let deadline = self.stream_idle_timeout;
3650 let timed_out: Vec<StreamId> = self
3651 .stream_last_activity_at
3652 .iter()
3653 .filter_map(|(&sid, &t)| {
3654 (self.streams.contains_key(&sid)
3655 && !self.rst_sent.contains(&sid)
3656 && now.saturating_duration_since(t) > deadline)
3657 .then_some(sid)
3658 })
3659 .collect();
3660 if timed_out.is_empty() {
3661 return;
3662 }
3663 for sid in timed_out {
3664 info!(
3665 "{} H2 stream {} idle > {:?}, cancelling (slow-multiplex guard)",
3666 log_context!(self),
3667 sid,
3668 deadline
3669 );
3670 // Route through the canonical chokepoint so dedupe (rst_sent),
3671 // queued-cap accounting (MAX_PENDING_RST_STREAMS via
3672 // total_rst_streams_queued), and edge-triggered-epoll arming
3673 // (Readiness::arm_writable) all stay consistent — see LIFECYCLE
3674 // §8.2. The previous direct push bypassed all three: a peer
3675 // that opens 200 streams and lets them all idle past
3676 // stream_idle_timeout could push past the queued cap silently
3677 // (no GOAWAY(ENHANCE_YOUR_CALM) escalation), a double-cancel
3678 // pass would grow pending_rst_streams instead of short-
3679 // circuiting on the existing rst_sent membership, and the
3680 // hand-rolled `interest.insert(WRITABLE) + signal_pending_write`
3681 // pair below skipped invariant 15. Counting these RSTs against
3682 // the cap is a deliberate behaviour change: 200 cumulative idle
3683 // cancellations from one peer IS abusive (pinning
3684 // MAX_CONCURRENT_STREAMS slots), and the GOAWAY(ENHANCE_YOUR_CALM)
3685 // escalation tells the peer to reconnect with a clean state.
3686 //
3687 // We deliberately ignore the `Option<MuxResult>` flood-violation
3688 // signal here — `cancel_timed_out_streams` returns `()` and is
3689 // called as best-effort housekeeping during the read path. A
3690 // flood violation that becomes visible mid-iteration will be
3691 // re-detected on the next `record_rst_emitted` call (the
3692 // counter is sticky), so dropping the early-return is safe.
3693 let _ = self.enqueue_rst(sid, H2Error::Cancel);
3694
3695 // Remove from streams map and recycle the context stream so the slot
3696 // no longer counts against MAX_CONCURRENT_STREAMS.
3697 // Compute totals per-stream before remove (matches RST_STREAM handler).
3698 let byte_totals = self.compute_stream_byte_totals(context);
3699 if let Some(global_stream_id) = self.streams.get(&sid).copied() {
3700 {
3701 let stream = &mut context.streams[global_stream_id];
3702 self.attribute_bytes_to_stream(&mut stream.metrics);
3703 }
3704 // Check if stream is linked to a backend — borrow must be scoped
3705 // so end_stream can take &mut context.
3706 let linked_token = context.streams[global_stream_id].linked_token();
3707 let (client_rtt, server_rtt) =
3708 Self::snapshot_rtts(&self.position, &self.socket, &*endpoint, linked_token);
3709 if let Some(token) = linked_token {
3710 endpoint.end_stream(token, global_stream_id, context);
3711 }
3712 let stream = &mut context.streams[global_stream_id];
3713 match &self.position {
3714 Position::Client(_, backend, BackendStatus::Connected) => {
3715 let mut backend_borrow = backend.borrow_mut();
3716 backend_borrow.active_requests =
3717 backend_borrow.active_requests.saturating_sub(1);
3718 }
3719 Position::Client(..) => {}
3720 Position::Server => {
3721 self.distribute_overhead(&mut stream.metrics, byte_totals);
3722 stream.metrics.backend_stop();
3723 stream.generate_access_log(
3724 true,
3725 Some("H2::IdleTimeout"),
3726 context.listener.clone(),
3727 client_rtt,
3728 server_rtt,
3729 );
3730 stream.state = StreamState::Recycle;
3731 }
3732 }
3733 // Retire sid from streams/prioriser/stream_last_activity_at and
3734 // invalidate expect_write/expect_read if they reference this gid.
3735 self.remove_dead_stream(sid, global_stream_id);
3736 }
3737 }
3738 // Writable arming is already done by enqueue_rst -> arm_writable in
3739 // the loop above; the trailing pair was redundant after the chokepoint
3740 // routing landed.
3741 }
3742
3743 /// Queue a `RST_STREAM` frame for serialisation by
3744 /// [`Self::flush_pending_control_frames`] on the next writable tick.
3745 ///
3746 /// This is the canonical entry point for proxy-emitted stream resets:
3747 /// `DATA` on a closed stream, `MAX_CONCURRENT_STREAMS` refusal, and the
3748 /// per-stream error paths in [`Self::reset_stream`] all funnel through
3749 /// here. Serialisation is independent of the owning `Stream` still
3750 /// existing in `self.streams`, which is what lets us emit even after a
3751 /// caller has already called [`Self::remove_dead_stream`].
3752 ///
3753 /// Delegates the primitive work to [`enqueue_rst_into`] so the invariants
3754 /// are covered by unit tests that don't need a full `ConnectionH2`
3755 /// fixture. See that function's doc-comment for the three invariants
3756 /// (dedupe via `rst_sent`, MadeYouReset queued cap via
3757 /// `total_rst_streams_queued`, edge-triggered-epoll arm via
3758 /// [`Readiness::arm_writable`]).
3759 fn enqueue_rst(&mut self, wire_stream_id: StreamId, error: H2Error) -> Option<MuxResult> {
3760 let freshly_queued = enqueue_rst_into(
3761 &mut self.pending_rst_streams,
3762 &mut self.total_rst_streams_queued,
3763 &mut self.rst_sent,
3764 &mut self.readiness,
3765 wire_stream_id,
3766 error,
3767 );
3768 // Account ONLY when a new RST actually entered the queue.
3769 // Calling `enqueue_rst` for a stream that already has a queued
3770 // (or already-flushed) RST is the dedup short-circuit — counting
3771 // those would inflate `h2.frames.tx.rst_stream` /
3772 // `h2.rst_stream.sent.*` and trip the CVE-2025-8671 MadeYouReset
3773 // lifetime cap on frames that never reached the wire.
3774 //
3775 // Account at queue-time, not at drain-time. Doing it later in
3776 // `flush_pending_control_frames` would double-count any RST that
3777 // a re-entrant call (DATA on a closed stream we already RSTed)
3778 // tried to enqueue — and missing it at queue-time leaves
3779 // `cancel_timed_out_streams` / `refuse_stream_and_discard` /
3780 // DATA-on-closed-stream paths bypassing the lifetime cap
3781 // (security review LISA-001 on commit `da845c71`).
3782 if freshly_queued {
3783 self.account_emitted_rst(error)
3784 } else {
3785 None
3786 }
3787 }
3788
3789 /// Single accounting site for proxy-emitted RST_STREAM frames.
3790 /// Three things must happen for every emitted RST so flood-protection
3791 /// stays honest: the global tx counter, the per-error breakdown,
3792 /// and the MadeYouReset emitted-RST lifetime cap.
3793 ///
3794 /// Two distinct emission paths feed this helper:
3795 /// * Queued frames — [`Self::enqueue_rst`] (and therefore every
3796 /// callable that funnels through it: `reset_stream`,
3797 /// `refuse_stream_and_discard`, `cancel_timed_out_streams`,
3798 /// DATA-on-closed-stream) calls this once at queue-time. The
3799 /// drain in `flush_pending_control_frames` does NOT call it
3800 /// again — that would double-count.
3801 /// * Converter-emitted frames — the converter's `initialize`
3802 /// chokepoint (and the HPACK over-budget abort path) writes
3803 /// RST_STREAM frames straight into `kawa.out` from inside
3804 /// `kawa.prepare`. We collect those `H2Error` codes during the
3805 /// `write_streams` loop and call this helper for each one
3806 /// after `drop(converter)` (because the converter holds
3807 /// `&mut self.encoder`).
3808 ///
3809 /// Returning `Some(MuxResult)` means the caller MUST short-circuit
3810 /// with that result — the flood detector tripped its lifetime cap
3811 /// and converted to a connection-wide GOAWAY.
3812 fn account_emitted_rst(&mut self, error: H2Error) -> Option<MuxResult> {
3813 incr!(names::h2::FRAMES_TX_RST_STREAM);
3814 count!(metric_for_rst_stream_sent(error), 1);
3815 if !matches!(error, H2Error::NoError) {
3816 if let Some(violation) = self.flood_detector.record_rst_emitted() {
3817 return Some(self.handle_flood_violation(violation));
3818 }
3819 }
3820 None
3821 }
3822
3823 /// Refuse a newly-opened stream with RST_STREAM and discard its HEADERS payload.
3824 ///
3825 /// Used when MAX_CONCURRENT_STREAMS is exceeded or buffer pool is exhausted.
3826 /// Queues the RST_STREAM for the writable path (can't write to kawa.storage
3827 /// here because it is needed to discard the HEADERS payload).
3828 ///
3829 /// Also applies SETTINGS back-pressure per RFC 9113 §5.1.2: if refusals
3830 /// burst past [`BACKPRESSURE_REFUSAL_THRESHOLD`] within
3831 /// [`BACKPRESSURE_WINDOW_DURATION`], the advertised
3832 /// `SETTINGS_MAX_CONCURRENT_STREAMS` is halved via
3833 /// [`Self::apply_mcs_backpressure`].
3834 fn refuse_stream_and_discard(
3835 &mut self,
3836 stream_id: StreamId,
3837 error: H2Error,
3838 payload_len: u32,
3839 ) -> MuxResult {
3840 if let Some(result) = self.enqueue_rst(stream_id, error) {
3841 return result;
3842 }
3843 self.state = H2State::Discard;
3844 self.expect_read = Some((H2StreamId::Zero, payload_len as usize));
3845 self.record_refusal_for_backpressure();
3846 MuxResult::Continue
3847 }
3848
3849 /// RFC 9113 §5.1.2 SETTINGS back-pressure bookkeeping.
3850 ///
3851 /// Increments the refusal counter for the current back-pressure window
3852 /// and, when the burst threshold is crossed, halves the advertised
3853 /// `SETTINGS_MAX_CONCURRENT_STREAMS`. Further halving attempts in the
3854 /// same connection are suppressed by [`Self::mcs_backpressure_applied`]
3855 /// so sustained abuse does not collapse the cap to zero — callers can
3856 /// still promote the situation to `EnhanceYourCalm` via the flood
3857 /// detector.
3858 fn record_refusal_for_backpressure(&mut self) {
3859 if self.refuse_window_start.elapsed() >= BACKPRESSURE_WINDOW_DURATION {
3860 self.refuse_count_window = 0;
3861 self.refuse_window_start = Instant::now();
3862 }
3863 self.refuse_count_window = self.refuse_count_window.saturating_add(1);
3864 if !self.mcs_backpressure_applied
3865 && self.refuse_count_window >= BACKPRESSURE_REFUSAL_THRESHOLD
3866 {
3867 self.apply_mcs_backpressure();
3868 }
3869 }
3870
3871 /// Halve the advertised `SETTINGS_MAX_CONCURRENT_STREAMS` and mark the
3872 /// back-pressure state as applied. The new value takes effect locally
3873 /// immediately — subsequent stream-open checks in `handle_header_state`
3874 /// compare `self.streams.len()` against this reduced cap, so the peer
3875 /// starts receiving `REFUSED_STREAM` earlier. A full SETTINGS re-send on
3876 /// the wire is deferred until we have a mid-connection SETTINGS queue
3877 /// (the existing path in `handle_preface_state` only fires during the
3878 /// handshake); this is noted in the task log as a minimal first step.
3879 fn apply_mcs_backpressure(&mut self) {
3880 let previous = self.local_settings.settings_max_concurrent_streams;
3881 let reduced = (previous / 2).max(1);
3882 warn!(
3883 "{} H2 SETTINGS back-pressure: refusals={} in {}s — halving \
3884 SETTINGS_MAX_CONCURRENT_STREAMS {} -> {}",
3885 log_context!(self),
3886 self.refuse_count_window,
3887 BACKPRESSURE_WINDOW_DURATION.as_secs(),
3888 previous,
3889 reduced,
3890 );
3891 self.local_settings.settings_max_concurrent_streams = reduced;
3892 self.mcs_backpressure_applied = true;
3893 }
3894
3895 /// Log a flood violation with full session context and emit the GOAWAY.
3896 ///
3897 /// Centralises the "flood detected" reporting so every site that observes a
3898 /// [`H2FloodViolation`] gets the same session-scoped log line, matching the
3899 /// RUSTLS log-context convention. Also emits the per-kind statsd counter
3900 /// (`h2.flood.violation.<kind>`) so SOC dashboards can window the trip
3901 /// rate without parsing logs — every CVE-mitigation in the H2 family
3902 /// (Rapid Reset, MadeYouReset, CONTINUATION/PING/SETTINGS floods, header
3903 /// overflow, glitch) funnels through this site.
3904 pub fn handle_flood_violation(&mut self, violation: H2FloodViolation) -> MuxResult {
3905 count!(violation.metric_key, 1);
3906 warn!(
3907 "{} H2 flood detected: {} count {} exceeds threshold {}",
3908 log_context!(self),
3909 violation.reason,
3910 violation.count,
3911 violation.threshold,
3912 );
3913 self.goaway(violation.error)
3914 }
3915}
3916
3917/// Recover the [`H2Error`] code that the converter's `initialize`
3918/// chokepoint will encode into the synthesised RST_STREAM frame for a
3919/// kawa stuck in [`kawa::ParsingPhase::Error`]. Mirrors the parse +
3920/// fallback at `lib/src/protocol/mux/converter.rs::initialize` so the
3921/// flood-accounting helper sees the same code that lands on the wire.
3922fn rst_error_from_kawa<T: kawa::AsBuffer>(kawa: &kawa::Kawa<T>) -> H2Error {
3923 match kawa.parsing_phase {
3924 kawa::ParsingPhase::Error {
3925 kind: kawa::ParsingErrorKind::Processing { message },
3926 ..
3927 } => message.parse::<H2Error>().unwrap_or(H2Error::InternalError),
3928 _ => H2Error::InternalError,
3929 }
3930}
3931
3932/// Compile-time mapping from `(prefix, H2Error)` to a static metric key.
3933///
3934/// Materialises a `&'static str` literal via `concat!`, so the metric key
3935/// never crosses through a heap allocation and the statsd drain can store it
3936/// as `&'static str`. Adding a new `H2Error` variant fails the build here —
3937/// the metric breakdown stays in lock-step with RFC 9113 §7 codes.
3938///
3939/// Used for the per-error-code counters emitted around GOAWAY and RST_STREAM
3940/// in either direction (see `metric_for_goaway_sent` etc. below).
3941macro_rules! h2_error_metric_key {
3942 ($prefix:literal, $error:expr) => {
3943 match $error {
3944 H2Error::NoError => concat!($prefix, ".no_error"),
3945 H2Error::ProtocolError => concat!($prefix, ".protocol_error"),
3946 H2Error::InternalError => concat!($prefix, ".internal_error"),
3947 H2Error::FlowControlError => concat!($prefix, ".flow_control_error"),
3948 H2Error::SettingsTimeout => concat!($prefix, ".settings_timeout"),
3949 H2Error::StreamClosed => concat!($prefix, ".stream_closed"),
3950 H2Error::FrameSizeError => concat!($prefix, ".frame_size_error"),
3951 H2Error::RefusedStream => concat!($prefix, ".refused_stream"),
3952 H2Error::Cancel => concat!($prefix, ".cancel"),
3953 H2Error::CompressionError => concat!($prefix, ".compression_error"),
3954 H2Error::ConnectError => concat!($prefix, ".connect_error"),
3955 H2Error::EnhanceYourCalm => concat!($prefix, ".enhance_your_calm"),
3956 H2Error::InadequateSecurity => concat!($prefix, ".inadequate_security"),
3957 H2Error::HTTP11Required => concat!($prefix, ".http_1_1_required"),
3958 }
3959 };
3960}
3961
3962/// Static metric key for an outbound GOAWAY. Same call shape as the other three
3963/// helpers below — keeps the call sites uniform.
3964fn metric_for_goaway_sent(error: H2Error) -> &'static str {
3965 h2_error_metric_key!("h2.goaway.sent", error)
3966}
3967
3968/// Static metric key for an inbound GOAWAY by raw wire error code. Codes
3969/// outside RFC 9113 §7 fall into the dedicated `…unknown_error` bucket so the
3970/// breakdown stays bounded and operators can still spot non-standard peers.
3971fn metric_for_goaway_received(error_code: u32) -> &'static str {
3972 H2Error::try_from(error_code)
3973 .map(|e| h2_error_metric_key!("h2.goaway.received", e))
3974 .unwrap_or("h2.goaway.received.unknown_error")
3975}
3976
3977/// Static metric key for an outbound RST_STREAM. Mirrors
3978/// [`metric_for_goaway_sent`] under a separate namespace so RST and GOAWAY
3979/// rates can be alerted on independently.
3980fn metric_for_rst_stream_sent(error: H2Error) -> &'static str {
3981 h2_error_metric_key!("h2.rst_stream.sent", error)
3982}
3983
3984/// Static metric key for an inbound RST_STREAM by raw wire error code. Same
3985/// `…unknown_error` fallback as [`metric_for_goaway_received`].
3986fn metric_for_rst_stream_received(error_code: u32) -> &'static str {
3987 H2Error::try_from(error_code)
3988 .map(|e| h2_error_metric_key!("h2.rst_stream.received", e))
3989 .unwrap_or("h2.rst_stream.received.unknown_error")
3990}
3991
3992/// Static metric key for an inbound H2 frame by RFC 9113 §6 frame type.
3993/// Emitted at the `handle_frame` dispatch — single chokepoint that any
3994/// new H2 frame type must traverse, so adding a `Frame::*` variant fails
3995/// the build here. Counts are per-frame, not per-byte; pair with
3996/// `bytes_in` for traffic-mix dashboards.
3997fn h2_frame_rx_metric_key(frame: &Frame) -> &'static str {
3998 match frame {
3999 Frame::Data(_) => "h2.frames.rx.data",
4000 Frame::Headers(_) => "h2.frames.rx.headers",
4001 Frame::PushPromise(_) => "h2.frames.rx.push_promise",
4002 Frame::Priority(_) => "h2.frames.rx.priority",
4003 Frame::RstStream(_) => "h2.frames.rx.rst_stream",
4004 Frame::Settings(_) => "h2.frames.rx.settings",
4005 Frame::Ping(_) => "h2.frames.rx.ping",
4006 Frame::GoAway(_) => "h2.frames.rx.goaway",
4007 Frame::WindowUpdate(_) => "h2.frames.rx.window_update",
4008 Frame::Continuation(_) => "h2.frames.rx.continuation",
4009 Frame::PriorityUpdate(_) => "h2.frames.rx.priority_update",
4010 Frame::Unknown(_) => "h2.frames.rx.unknown",
4011 }
4012}
4013
4014impl<Front: SocketHandler> ConnectionH2<Front> {
4015 pub fn goaway(&mut self, error: H2Error) -> MuxResult {
4016 self.state = H2State::Error;
4017 self.drain.draining = true;
4018 self.expect_read = None;
4019 // Disarm the SETTINGS ACK timer: once we've committed to GOAWAY, the
4020 // timeout check at `readable()` / `flush_pending_control_frames()` must
4021 // not re-fire. Without this, `signal_pending_write()` below re-enters
4022 // `writable()` → `flush_pending_control_frames()` on the next tick,
4023 // the elapsed check is still true, and we emit another
4024 // `warn!` + `goaway()` pair, each bumping `h2.goaway.sent.*`.
4025 self.settings_sent_at = None;
4026 let kawa = &mut self.zero;
4027 kawa.storage.clear();
4028 // Severity tiering: only `InternalError` implies a sozu-side bug when
4029 // WE emit it. Every other non-`NoError` reason is "peer misbehaved,
4030 // sozu defended correctly" — operators don't need paging on abusive
4031 // or buggy peers. Caller sites already log the specific antecedent
4032 // (flood detected, parser failure, SETTINGS timeout, invalid window)
4033 // before reaching `goaway()`, so demoting this summary line avoids
4034 // duplicate noise without hiding the root cause.
4035 match error {
4036 H2Error::NoError => debug!("{} GOAWAY: {:?}", log_context!(self), error),
4037 H2Error::InternalError => error!("{} GOAWAY: {:?}", log_context!(self), error),
4038 _ => warn!("{} GOAWAY: {:?}", log_context!(self), error),
4039 }
4040 count!(metric_for_goaway_sent(error), 1);
4041
4042 // RFC 9113 §6.8: last_stream_id is the highest peer-initiated stream we processed
4043 match serializer::gen_goaway(kawa.storage.space(), self.highest_peer_stream_id, error) {
4044 Ok((_, size)) => {
4045 kawa.storage.fill(size);
4046 incr!(names::h2::FRAMES_TX_GOAWAY);
4047 self.state = H2State::GoAway;
4048 self.expect_write = Some(H2StreamId::Zero);
4049 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
4050 self.readiness.signal_pending_write();
4051 MuxResult::Continue
4052 }
4053 Err(error) => {
4054 error!(
4055 "{} Could not serialize GoAwayFrame: {:?}",
4056 log_context!(self),
4057 error
4058 );
4059 self.force_disconnect()
4060 }
4061 }
4062 }
4063
4064 /// RFC 9113 §6.8: Initiate graceful shutdown using the double-GOAWAY pattern.
4065 ///
4066 /// First call sends GOAWAY with `last_stream_id = 0x7FFFFFFF` (MAX) to signal
4067 /// the intent to stop accepting new streams while allowing in-flight streams
4068 /// to complete. The connection enters draining mode.
4069 ///
4070 /// When `draining` is already true (second invocation), sends the final GOAWAY
4071 /// with the actual `highest_peer_stream_id` so the peer knows which streams
4072 /// were processed.
4073 pub fn graceful_goaway(&mut self) -> MuxResult {
4074 if self.drain.draining {
4075 // Second GOAWAY: send with the real last_stream_id
4076 return self.goaway(H2Error::NoError);
4077 }
4078
4079 // First GOAWAY: advertise MAX stream ID so the peer knows we are draining
4080 // but does not yet know the cutoff. This gives in-flight requests a chance
4081 // to arrive before we commit to a final last_stream_id.
4082 self.drain.draining = true;
4083 // Arm the forced-close timer from the moment the proxy decides to drain.
4084 // `Mux::shutting_down` samples it against `graceful_shutdown_deadline`
4085 // and returns `true` once the budget is exhausted so the session loop
4086 // tears the connection down instead of waiting forever.
4087 self.drain.started_at = Some(Instant::now());
4088 // Keep expect_read as-is: existing streams should continue reading
4089 // data during the drain window opened by the initial GOAWAY. Only
4090 // the final GOAWAY (via `goaway()`) removes READABLE.
4091 let kawa = &mut self.zero;
4092 kawa.storage.clear();
4093 debug!(
4094 "{} GOAWAY (graceful, initial): last_stream_id=0x7FFFFFFF",
4095 log_context!(self)
4096 );
4097 // The initial GOAWAY sends NO_ERROR on the wire — count it under
4098 // the same per-code key as the final GOAWAY. The downstream alert
4099 // that wants to distinguish drain from termination compares
4100 // against the `h2.goaway.sent.no_error` rate (drain) vs the other
4101 // variants (termination on error).
4102 count!(metric_for_goaway_sent(H2Error::NoError), 1);
4103
4104 match serializer::gen_goaway(kawa.storage.space(), STREAM_ID_MAX, H2Error::NoError) {
4105 Ok((_, size)) => {
4106 kawa.storage.fill(size);
4107 incr!(names::h2::FRAMES_TX_GOAWAY);
4108 // Stay in the current state so the connection can continue processing
4109 // existing streams. The final GOAWAY will transition to GoAway state.
4110 // Keep READABLE so in-flight request bodies can still be received
4111 // during the drain window. Only remove READABLE in the final GOAWAY
4112 // (via `goaway()`).
4113 self.expect_write = Some(H2StreamId::Zero);
4114 self.readiness.arm_writable();
4115 MuxResult::Continue
4116 }
4117 Err(error) => {
4118 error!(
4119 "{} Could not serialize graceful GoAwayFrame: {:?}",
4120 log_context!(self),
4121 error
4122 );
4123 self.force_disconnect()
4124 }
4125 }
4126 }
4127
4128 /// Returns `true` when the graceful-shutdown budget armed by
4129 /// [`Self::graceful_goaway`] has elapsed. A return of `true` signals
4130 /// the enclosing session loop that the proxy-initiated drain must
4131 /// transition to a forced close: remaining streams will not complete
4132 /// in time and keeping the connection open past the deadline defeats
4133 /// the soft-stop SLA.
4134 ///
4135 /// Returns `false` when:
4136 /// - drain has not started yet (`started_at` is `None`),
4137 /// - the knob is `0` / `None` (indefinite wait explicitly opted in),
4138 /// - or the elapsed time is still within the configured budget.
4139 pub fn graceful_shutdown_deadline_elapsed(&self) -> bool {
4140 match (self.drain.started_at, self.drain.graceful_shutdown_deadline) {
4141 (Some(started_at), Some(deadline)) => started_at.elapsed() >= deadline,
4142 _ => false,
4143 }
4144 }
4145
4146 /// Returns `true` if there is data queued waiting to be flushed:
4147 /// - H2 control frames in the zero buffer (GOAWAY, SETTINGS ACK, etc.)
4148 /// - A partially-written stream or control frame (`expect_write`)
4149 /// - Encrypted TLS records in rustls's output buffer not yet flushed to TCP
4150 ///
4151 /// The TLS check is critical: `shutting_down()` uses this to prevent
4152 /// premature session close while response DATA is still in rustls's
4153 /// buffer (accepted by `socket_write_vectored` but not yet on the wire).
4154 ///
4155 /// Does NOT check per-stream `back.out`/`back.blocks`; use
4156 /// [`Self::has_pending_write_full`] on paths that must honour
4157 /// LIFECYCLE invariant 16 (e.g. shutdown-drain).
4158 pub fn has_pending_write(&self) -> bool {
4159 if self.peer_gone_after_final_goaway() {
4160 return false;
4161 }
4162 self.expect_write.is_some()
4163 || !self.zero.storage.is_empty()
4164 || self.socket.socket_wants_write()
4165 }
4166
4167 /// Connection-level [`Self::has_pending_write`] extended with a per-stream
4168 /// back-buffer probe (LIFECYCLE §9 invariant 16). Used by shutdown-drain
4169 /// paths that must not close while any open stream still has outbound
4170 /// kawa bytes queued — a voluntary scheduler yield can leave `back.out`
4171 /// or `back.blocks` non-empty without `expect_write` being set.
4172 pub fn has_pending_write_full<L>(&self, context: &Context<L>) -> bool
4173 where
4174 L: ListenerHandler + L7ListenerHandler,
4175 {
4176 self.has_pending_write() || any_stream_has_pending_back(&self.streams, &context.streams)
4177 }
4178
4179 /// Flush the zero buffer to the socket, counting bytes as connection overhead.
4180 ///
4181 /// Returns `true` if the socket stalled (WouldBlock / zero-length write),
4182 /// meaning the caller should stop writing and wait for the next writable event.
4183 /// Returns `false` when the buffer has been fully drained.
4184 fn flush_zero_to_socket(&mut self) -> bool {
4185 while !self.zero.storage.is_empty() {
4186 let (size, status) = self.socket.socket_write(self.zero.storage.data());
4187 #[cfg(debug_assertions)]
4188 trace!(
4189 "{} flush_zero_to_socket: written={}, status={:?}, wants_write={}",
4190 log_context!(self),
4191 size,
4192 status,
4193 self.socket.socket_wants_write()
4194 );
4195 self.zero.storage.consume(size);
4196 self.position.count_bytes_out_counter(size);
4197 self.bytes.overhead_bout += size;
4198 if update_readiness_after_write(size, status, &mut self.readiness) {
4199 return true;
4200 }
4201 }
4202 // Reset buffer positions after draining. consume() advances start but
4203 // never resets it, so without clear() the next fill would panic.
4204 self.zero.storage.clear();
4205 false
4206 }
4207
4208 /// Directly flush the zero buffer to the socket without going through
4209 /// the full writable() path. Used during shutdown when the event loop
4210 /// won't deliver new epoll events for this session (edge-triggered).
4211 pub fn flush_zero_buffer(&mut self) {
4212 if self.flush_zero_to_socket() {
4213 return;
4214 }
4215 self.expect_write = None;
4216 if self.socket.socket_wants_write() {
4217 let (_size, status) = self.socket.socket_write(&[]);
4218 let _ = update_readiness_after_write(0, status, &mut self.readiness);
4219 }
4220 }
4221
4222 pub fn create_stream<L>(
4223 &mut self,
4224 stream_id: StreamId,
4225 context: &mut Context<L>,
4226 ) -> Option<GlobalStreamId>
4227 where
4228 L: ListenerHandler + L7ListenerHandler,
4229 {
4230 // RFC 9113 §6.8: reject new streams on a draining connection
4231 if self.drain.draining {
4232 error!(
4233 "{} Rejecting new stream {} on draining connection",
4234 log_context!(self),
4235 stream_id
4236 );
4237 return None;
4238 }
4239 // Track the highest peer-initiated stream ID for GoAway frames
4240 // before any early return, so GoAway always reports the correct last stream.
4241 if stream_id > self.highest_peer_stream_id {
4242 self.highest_peer_stream_id = stream_id;
4243 }
4244 let global_stream_id = context.create_stream(
4245 Ulid::generate(),
4246 self.peer_settings.settings_initial_window_size,
4247 )?;
4248 self.last_stream_id = (stream_id + 2) & !1;
4249 self.streams.insert(stream_id, global_stream_id);
4250 self.stream_last_activity_at
4251 .insert(stream_id, Instant::now());
4252 Some(global_stream_id)
4253 }
4254
4255 pub fn new_stream_id(&mut self) -> Option<StreamId> {
4256 let (issued, next) = next_stream_id(self.last_stream_id, self.position.is_client())?;
4257 self.last_stream_id = next;
4258 Some(issued)
4259 }
4260
4261 /// Test-only setter: jump `last_stream_id` close to [`STREAM_ID_MAX`] so
4262 /// that the next call to [`Self::new_stream_id`] exhausts the 31-bit
4263 /// space. FIX-22 ("Stream-ID exhaustion disconnects backend gracefully")
4264 /// exercises the `None`-return branch — reaching it through normal API
4265 /// usage would require issuing ~2³¹ requests, which is not tractable in
4266 /// an E2E harness.
4267 #[cfg(any(test, feature = "e2e-hooks"))]
4268 pub fn __test_set_last_stream_id(&mut self, id: StreamId) {
4269 self.last_stream_id = id;
4270 }
4271
4272 fn handle_frame<E, L>(
4273 &mut self,
4274 frame: Frame,
4275 wire_payload_len: u32,
4276 context: &mut Context<L>,
4277 endpoint: E,
4278 ) -> MuxResult
4279 where
4280 E: Endpoint,
4281 L: ListenerHandler + L7ListenerHandler,
4282 {
4283 trace!("{} {:#?}", log_context!(self), frame);
4284 // Per-frame-type RX counter. Single chokepoint covers every H2 frame
4285 // type — adding a new `Frame::*` variant fails the build inside the
4286 // helper, keeping the metric breakdown in lock-step with RFC 9113 §6.
4287 count!(h2_frame_rx_metric_key(&frame), 1);
4288 match frame {
4289 Frame::Data(data) => self.handle_data_frame(data, wire_payload_len, context, endpoint),
4290 Frame::Headers(headers) => self.handle_headers_frame(headers, context, endpoint),
4291 Frame::PushPromise(_) => self.handle_push_promise_frame(),
4292 Frame::Priority(priority) => self.handle_priority_frame(priority, context, endpoint),
4293 Frame::RstStream(rst_stream) => {
4294 self.handle_rst_stream_frame(rst_stream, context, endpoint)
4295 }
4296 Frame::Settings(settings) => self.handle_settings_frame(settings, context),
4297 Frame::Ping(ping) => self.handle_ping_frame(ping),
4298 Frame::GoAway(goaway) => self.handle_goaway_frame(goaway, context, endpoint),
4299 Frame::WindowUpdate(wu) => self.handle_window_update_frame(wu, context, endpoint),
4300 Frame::PriorityUpdate(pu) => self.handle_priority_update_frame(pu),
4301 Frame::Continuation(_) => {
4302 // Unreachable: standalone CONTINUATION is rejected in
4303 // `handle_header_state` (RFC 9113 §6.10) and in-block
4304 // CONTINUATION is consumed by the inline header-parsing
4305 // path. Keep a defensive fallback that returns
4306 // PROTOCOL_ERROR rather than panicking in debug builds.
4307 self.attribute_bytes_to_overhead();
4308 warn!(
4309 "{} CONTINUATION frames are handled inline during header parsing",
4310 log_context!(self)
4311 );
4312 self.goaway(H2Error::ProtocolError)
4313 }
4314 // RFC 9113 §5.5: unknown frame types MUST be ignored and discarded.
4315 // The parser already consumed the payload; attribute the bytes
4316 // to connection-level overhead and continue.
4317 Frame::Unknown(raw) => {
4318 debug!(
4319 "{} Ignoring unknown H2 frame type {}",
4320 log_context!(self),
4321 raw
4322 );
4323 self.attribute_bytes_to_overhead();
4324 MuxResult::Continue
4325 }
4326 }
4327 }
4328
4329 /// RFC 9110 §8.6: Content-Length validation must be skipped for responses
4330 /// where the body is absent by definition:
4331 /// - Responses to HEAD requests (any status)
4332 /// - 1xx informational responses
4333 /// - 204 No Content
4334 /// - 304 Not Modified
4335 fn content_length_exempt(
4336 &self,
4337 context: &crate::protocol::kawa_h1::editor::HttpContext,
4338 ) -> bool {
4339 use crate::protocol::kawa_h1::parser::Method;
4340 // HEAD method responses (only relevant when reading backend responses)
4341 if self.position.is_client() && context.method == Some(Method::Head) {
4342 return true;
4343 }
4344 // 1xx, 204, 304 status codes
4345 if let Some(status) = context.status {
4346 if (100..200).contains(&status) || status == 204 || status == 304 {
4347 return true;
4348 }
4349 }
4350 false
4351 }
4352
4353 fn handle_data_frame<E, L>(
4354 &mut self,
4355 data: parser::Data,
4356 wire_payload_len: u32,
4357 context: &mut Context<L>,
4358 mut endpoint: E,
4359 ) -> MuxResult
4360 where
4361 E: Endpoint,
4362 L: ListenerHandler + L7ListenerHandler,
4363 {
4364 // CVE-2019-9518: track empty DATA frames (no payload, no END_STREAM)
4365 if data.payload.is_empty() && !data.end_stream {
4366 self.flood_detector.empty_data_count += 1;
4367 check_flood_or_return!(self);
4368 }
4369 let Some(global_stream_id) = self.streams.get(&data.stream_id).copied() else {
4370 // The stream was terminated while data was expected,
4371 // probably due to automatic answer for invalid/unauthorized access.
4372 // RFC 9113 §6.9: we MUST still account for the DATA payload in
4373 // connection-level flow control using the full wire length
4374 // (including pad-length byte and padding), otherwise the window
4375 // shrinks permanently and eventually stalls the connection.
4376 self.flow_control.received_bytes_since_update += wire_payload_len;
4377 let conn_threshold = self.connection_config.initial_connection_window / 2;
4378 if self.flow_control.received_bytes_since_update >= conn_threshold {
4379 let increment = self.flow_control.received_bytes_since_update;
4380 self.queue_window_update(0, increment);
4381 self.flow_control.received_bytes_since_update = 0;
4382 self.readiness.arm_writable();
4383 }
4384 self.attribute_bytes_to_overhead();
4385 return MuxResult::Continue;
4386 };
4387 let mut slice = data.payload;
4388 let stream = &mut context.streams[global_stream_id];
4389 // Unpadded application payload size — what is forwarded to the backend
4390 // and counted against Content-Length.
4391 let content_len = slice.len();
4392 // Full wire-payload size (includes pad-length byte and padding).
4393 // RFC 9113 §5.2: padding counts against flow-control windows.
4394 let wire_len = wire_payload_len as usize;
4395 let cl_exempt = self.content_length_exempt(&stream.context);
4396
4397 // Extract declared content-length and update position-aware data counter
4398 let (data_received, declared_length) = {
4399 let parts = stream.split(&self.position);
4400 *parts.data_received += content_len;
4401 let total = *parts.data_received;
4402 let declared = match parts.rbuffer.body_size {
4403 kawa::BodySize::Length(n) => Some(n),
4404 _ => None,
4405 };
4406 (total, declared)
4407 };
4408
4409 // RFC 9113 §6.9 + §5.2: credit connection-level flow control BEFORE any
4410 // early-return path. Malformed DATA still consumed the peer's send
4411 // window; without crediting it back, repeated bad streams permanently
4412 // shrink the connection window and stall unrelated streams that share
4413 // the same H2 connection. Stream-level credit can stay below — once we
4414 // RST the violating stream, its per-stream window is moot per
4415 // RFC 9113 §6.9 (the receiver discards further frames for the stream).
4416 let conn_threshold = self.connection_config.initial_connection_window / 2;
4417 self.flow_control.received_bytes_since_update += wire_payload_len;
4418 if self.flow_control.received_bytes_since_update >= conn_threshold {
4419 let increment = self.flow_control.received_bytes_since_update;
4420 self.queue_window_update(0, increment);
4421 self.flow_control.received_bytes_since_update = 0;
4422 }
4423
4424 // RFC 9113 §8.1.1: if Content-Length is present, total DATA payload
4425 // must not exceed the declared length (check on every frame).
4426 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses (body absent by definition).
4427 if !cl_exempt {
4428 if let Some(expected) = declared_length {
4429 if data_received > expected {
4430 error!(
4431 "{} Content-Length mismatch: received {} > declared {}",
4432 log_context!(self),
4433 data_received,
4434 expected
4435 );
4436 // Pair WRITABLE arming with the queued connection-level
4437 // WINDOW_UPDATE before returning; otherwise the credit sits
4438 // until the next inbound frame on this connection.
4439 if !self.flow_control.pending_window_updates.is_empty() {
4440 self.readiness.arm_writable();
4441 }
4442 let result = self.reset_stream(
4443 data.stream_id,
4444 global_stream_id,
4445 context,
4446 endpoint,
4447 H2Error::ProtocolError,
4448 );
4449 self.remove_dead_stream(data.stream_id, global_stream_id);
4450 return result;
4451 }
4452 }
4453 }
4454
4455 let stream = &mut context.streams[global_stream_id];
4456 self.attribute_bytes_to_stream(&mut stream.metrics);
4457 let stream_state = stream.state;
4458 let is_unlinked = matches!(stream_state, StreamState::Unlinked);
4459 let parts = stream.split(&self.position);
4460 let kawa = parts.rbuffer;
4461 self.position.count_bytes_in(parts.metrics, content_len);
4462
4463 // Stream-level flow control (only if stream is still open).
4464 // Connection-level credit was already applied above the CL check so
4465 // malformed DATA cannot starve the connection window for other streams.
4466 if !data.end_stream {
4467 self.queue_window_update(data.stream_id, wire_payload_len);
4468 }
4469
4470 // If we have pending updates, ensure we get a writable event.
4471 // Must use signal_pending_write() — not just interest.insert() — because
4472 // under edge-triggered epoll the WRITABLE event bit may have been consumed
4473 // by a previous write cycle. Without the event bit set, filter_interest()
4474 // returns 0 and the WINDOW_UPDATEs never get flushed, stalling the client.
4475 if !self.flow_control.pending_window_updates.is_empty() {
4476 self.readiness.arm_writable();
4477 }
4478
4479 // Refresh per-stream idle timer on non-empty DATA.
4480 // Empty DATA frames (CVE-2019-9518 vector) must NOT reset the timer,
4481 // otherwise an attacker can keep a stream alive indefinitely with
4482 // zero-length frames while pinning a MAX_CONCURRENT_STREAMS slot.
4483 if content_len > 0 {
4484 if let Some(t) = self.stream_last_activity_at.get_mut(&data.stream_id) {
4485 *t = Instant::now();
4486 }
4487 }
4488
4489 if is_unlinked {
4490 // Backend is gone but client is still sending DATA.
4491 // Discard the data (flow control updates were already
4492 // queued above) to prevent the buffer from filling up.
4493 kawa.storage.clear();
4494 if data.end_stream {
4495 kawa.parsing_phase = kawa::ParsingPhase::Terminated;
4496 self.mark_end_of_stream(stream);
4497 }
4498 } else {
4499 // Advance storage.head by the full wire payload length so the
4500 // next frame doesn't read stale pad-length+padding bytes.
4501 slice.start = slice.start.saturating_add(kawa.storage.head as u32);
4502 kawa.storage.head += wire_len;
4503
4504 // Emit chunk framing for chunked transfer encoding (H2→H1 path).
4505 // H2 converter ignores ChunkHeader and end_chunk Flags, so this is safe for H2→H2.
4506 if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
4507 let hex_len = {
4508 let mut buf = Vec::with_capacity(16);
4509 let _ = write!(buf, "{content_len:x}");
4510 buf
4511 };
4512 kawa.push_block(kawa::Block::ChunkHeader(kawa::ChunkHeader {
4513 length: kawa::Store::from_vec(hex_len),
4514 }));
4515 }
4516
4517 kawa.push_block(kawa::Block::Chunk(kawa::Chunk {
4518 data: kawa::Store::Slice(slice),
4519 }));
4520
4521 if kawa.body_size == kawa::BodySize::Chunked && content_len > 0 {
4522 kawa.push_block(kawa::Block::Flags(kawa::Flags {
4523 end_body: false,
4524 end_chunk: true,
4525 end_header: false,
4526 end_stream: false,
4527 }));
4528 }
4529
4530 if data.end_stream {
4531 // RFC 9113 §8.1.1: on end_stream, total DATA must equal Content-Length.
4532 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
4533 if !cl_exempt {
4534 if let Some(expected) = declared_length {
4535 if data_received != expected {
4536 error!(
4537 "{} Content-Length mismatch: received {} != declared {}",
4538 log_context!(self),
4539 data_received,
4540 expected
4541 );
4542 let result = self.reset_stream(
4543 data.stream_id,
4544 global_stream_id,
4545 context,
4546 endpoint,
4547 H2Error::ProtocolError,
4548 );
4549 self.remove_dead_stream(data.stream_id, global_stream_id);
4550 return result;
4551 }
4552 }
4553 }
4554 let is_chunked = kawa.body_size == kawa::BodySize::Chunked;
4555 kawa.push_block(kawa::Block::Flags(kawa::Flags {
4556 end_body: true,
4557 end_chunk: is_chunked,
4558 end_header: false,
4559 end_stream: true,
4560 }));
4561 kawa.parsing_phase = kawa::ParsingPhase::Terminated;
4562 self.mark_end_of_stream(stream);
4563 }
4564 if let StreamState::Linked(token) = stream_state {
4565 // Mirror of h1.rs:361-368 for the H2-backend → H2-frontend
4566 // path: edge-triggered epoll will NOT re-fire for bytes we
4567 // just pushed into stream.back; the synthetic event is the
4568 // only wake path. LIFECYCLE invariant 15.
4569 endpoint.readiness_mut(token).arm_writable();
4570 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_DATA);
4571 }
4572 }
4573 MuxResult::Continue
4574 }
4575
4576 fn handle_headers_frame<E, L>(
4577 &mut self,
4578 headers: Headers,
4579 context: &mut Context<L>,
4580 mut endpoint: E,
4581 ) -> MuxResult
4582 where
4583 E: Endpoint,
4584 L: ListenerHandler + L7ListenerHandler,
4585 {
4586 // HEADERS frames represent real application activity (new request
4587 // or response). Reset the timeout since the peer is actively
4588 // communicating, unlike control frames (PING, WINDOW_UPDATE).
4589 self.timeout_container.reset();
4590 if !headers.end_headers {
4591 // CVE-2024-27316: only initialize tracking on the very first HEADERS
4592 // fragment, not on re-entries from ContinuationFrame (which call
4593 // handle_frame(Frame::Headers) with the accumulated header block).
4594 if self.flood_detector.continuation_count == 0 {
4595 self.flood_detector.accumulated_header_size = headers.header_block_fragment.len;
4596 }
4597 debug!(
4598 "{} FRAGMENT: stream_id={}, len={}",
4599 log_context!(self),
4600 headers.stream_id,
4601 self.zero.storage.data().len()
4602 );
4603 self.state = H2State::ContinuationHeader(headers);
4604 return MuxResult::Continue;
4605 }
4606 // Header block is complete — reset CONTINUATION counters
4607 self.flood_detector.reset_continuation();
4608 // can this fail?
4609 let stream_id = headers.stream_id;
4610 let Some(global_stream_id) = self.streams.get(&stream_id).copied() else {
4611 error!(
4612 "{} Handling Headers frame with no attached stream {:#?}",
4613 log_context!(self),
4614 self
4615 );
4616 incr!(names::h2::HEADERS_NO_STREAM_ERROR);
4617 self.attribute_bytes_to_overhead();
4618 return self.force_disconnect();
4619 };
4620
4621 // Refresh per-stream idle timer on HEADERS (response headers or trailers
4622 // on an existing stream). Initial HEADERS that create the stream already
4623 // set the timestamp in create_stream().
4624 if let Some(t) = self.stream_last_activity_at.get_mut(&stream_id) {
4625 *t = Instant::now();
4626 }
4627
4628 if let Some(priority) = &headers.priority {
4629 if self.prioriser.push_priority(stream_id, priority.clone()) {
4630 self.reset_stream(
4631 stream_id,
4632 global_stream_id,
4633 context,
4634 endpoint,
4635 H2Error::ProtocolError,
4636 );
4637 self.remove_dead_stream(stream_id, global_stream_id);
4638 return MuxResult::Continue;
4639 }
4640 }
4641
4642 let stream = &mut context.streams[global_stream_id];
4643 self.attribute_bytes_to_stream(&mut stream.metrics);
4644 let kawa = &mut self.zero;
4645 let buffer = headers.header_block_fragment.data(kawa.storage.buffer());
4646 let stream = &mut context.streams[global_stream_id];
4647 let parts = &mut stream.split(&self.position);
4648 let was_initial = parts.rbuffer.is_initial();
4649 let elide_x_real_ip = parts.context.elide_x_real_ip;
4650 let status = pkawa::handle_header(
4651 &mut self.decoder,
4652 &mut self.prioriser,
4653 stream_id,
4654 parts.rbuffer,
4655 buffer,
4656 headers.end_stream,
4657 parts.context,
4658 self.flood_detector.config.max_header_list_size,
4659 elide_x_real_ip,
4660 );
4661 kawa.storage.clear();
4662 if let Err((error, global)) = status {
4663 match self.position {
4664 Position::Client(..) => incr!(names::http::BACKEND_PARSE_ERRORS),
4665 Position::Server => incr!(names::http::FRONTEND_PARSE_ERRORS),
4666 }
4667 if global {
4668 error!(
4669 "{} GOT GLOBAL ERROR WHILE PROCESSING HEADERS",
4670 log_context!(self)
4671 );
4672 return self.goaway(error);
4673 } else {
4674 let result =
4675 self.reset_stream(stream_id, global_stream_id, context, endpoint, error);
4676 self.remove_dead_stream(stream_id, global_stream_id);
4677 return result;
4678 }
4679 }
4680 if headers.end_stream {
4681 // RFC 9113 §8.1.1: when END_STREAM arrives via trailers,
4682 // validate that total DATA received matches Content-Length.
4683 // RFC 9110 §8.6: skip for HEAD/1xx/204/304 responses.
4684 if !was_initial && !self.content_length_exempt(&stream.context) {
4685 let parts = stream.split(&self.position);
4686 if let kawa::BodySize::Length(expected) = parts.rbuffer.body_size {
4687 if *parts.data_received != expected {
4688 error!(
4689 "{} Content-Length mismatch on trailers: received {} != declared {}",
4690 log_context!(self),
4691 *parts.data_received,
4692 expected
4693 );
4694 let result = self.reset_stream(
4695 stream_id,
4696 global_stream_id,
4697 context,
4698 endpoint,
4699 H2Error::ProtocolError,
4700 );
4701 self.remove_dead_stream(stream_id, global_stream_id);
4702 return result;
4703 }
4704 }
4705 }
4706 self.mark_end_of_stream(stream);
4707 }
4708 if let StreamState::Linked(token) = stream.state {
4709 // Mirror of handle_data_frame's rearm. LIFECYCLE invariant 15.
4710 endpoint.readiness_mut(token).arm_writable();
4711 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PEER_HEADERS);
4712 }
4713 // was_initial prevents trailers from triggering connection
4714 if was_initial && self.position.is_server() {
4715 incr!(names::http::REQUESTS);
4716 gauge_add!(names::http::ACTIVE_REQUESTS, 1);
4717 stream.metrics.service_start();
4718 stream.request_counted = true;
4719 stream.state = StreamState::Link;
4720 context.pending_links.push_back(global_stream_id);
4721 }
4722 MuxResult::Continue
4723 }
4724
4725 fn handle_push_promise_frame(&mut self) -> MuxResult {
4726 self.attribute_bytes_to_overhead();
4727 match self.position {
4728 Position::Client(..) => {
4729 // RFC 9113 §8.4: Server push is deprecated. Sozu never sends
4730 // SETTINGS_ENABLE_PUSH=1, so receiving PUSH_PROMISE is a protocol error.
4731 error!(
4732 "{} Received PUSH_PROMISE but server push is not supported",
4733 log_context!(self)
4734 );
4735 self.goaway(H2Error::ProtocolError)
4736 }
4737 Position::Server => {
4738 // Clients must never send PUSH_PROMISE (RFC 9113 §8.4)
4739 error!("{} Received PUSH_PROMISE from client", log_context!(self));
4740 self.goaway(H2Error::ProtocolError)
4741 }
4742 }
4743 }
4744
4745 fn handle_priority_frame<E, L>(
4746 &mut self,
4747 priority: parser::Priority,
4748 context: &mut Context<L>,
4749 endpoint: E,
4750 ) -> MuxResult
4751 where
4752 E: Endpoint,
4753 L: ListenerHandler + L7ListenerHandler,
4754 {
4755 if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
4756 let stream = &mut context.streams[global_stream_id];
4757 self.attribute_bytes_to_stream(&mut stream.metrics);
4758 } else {
4759 self.attribute_bytes_to_overhead();
4760 }
4761 // Pass 3 Medium #4: standalone PRIORITY frames can arrive for any
4762 // peer-chosen stream ID. Accept only currently-open streams and a
4763 // small idle look-ahead window; everything else is dropped before
4764 // it can feed memory into the priority map.
4765 if self.prioriser.push_priority_guarded(
4766 priority.stream_id,
4767 priority.inner,
4768 self.last_stream_id,
4769 &self.streams,
4770 ) {
4771 if let Some(global_stream_id) = self.streams.get(&priority.stream_id).copied() {
4772 let result = self.reset_stream(
4773 priority.stream_id,
4774 global_stream_id,
4775 context,
4776 endpoint,
4777 H2Error::ProtocolError,
4778 );
4779 self.remove_dead_stream(priority.stream_id, global_stream_id);
4780 return result;
4781 } else {
4782 error!(
4783 "{} INVALID PRIORITY RECEIVED ON INVALID STREAM",
4784 log_context!(self)
4785 );
4786 return self.goaway(H2Error::ProtocolError);
4787 }
4788 }
4789 MuxResult::Continue
4790 }
4791
4792 /// RFC 9218 §7.1: PRIORITY_UPDATE reprioritizes an open or idle-soon
4793 /// stream at the connection level. Decodes the priority field value
4794 /// (same grammar as the `priority` request header, `parse_rfc9218_priority`)
4795 /// and pushes it into the `Prioriser` through the same guarded path used
4796 /// for standalone PRIORITY frames — the guard bounds memory against a
4797 /// client spamming PRIORITY_UPDATE for far-future stream IDs.
4798 ///
4799 /// Prioritized stream ID `0` is a connection-level `PROTOCOL_ERROR`
4800 /// (RFC 9218 §7.1). For any other ID that is not currently open or
4801 /// within the idle look-ahead budget, the update is silently dropped
4802 /// (matches the PRIORITY-frame guard semantics — no state change).
4803 fn handle_priority_update_frame(&mut self, pu: parser::PriorityUpdate) -> MuxResult {
4804 self.attribute_bytes_to_overhead();
4805 if pu.prioritized_stream_id == 0 {
4806 error!(
4807 "{} PRIORITY_UPDATE with prioritized_stream_id=0 (RFC 9218 §7.1)",
4808 log_context!(self)
4809 );
4810 return self.goaway(H2Error::ProtocolError);
4811 }
4812 let (urgency, incremental) = pkawa::parse_rfc9218_priority(&pu.priority_field_value);
4813 let (prev_urgency, _) = self.prioriser.get(&pu.prioritized_stream_id);
4814 trace!(
4815 "{} PRIORITY_UPDATE stream={} urgency={}->{} incremental={} rearmed_writable=true",
4816 log_context!(self),
4817 pu.prioritized_stream_id,
4818 prev_urgency,
4819 urgency,
4820 incremental
4821 );
4822 let _ = self.prioriser.push_priority_guarded(
4823 pu.prioritized_stream_id,
4824 parser::PriorityPart::Rfc9218 {
4825 urgency,
4826 incremental,
4827 },
4828 self.last_stream_id,
4829 &self.streams,
4830 );
4831 // LIFECYCLE invariant 15: reprioritisation only changes ordering for
4832 // the NEXT write pass. Under ET epoll, if finalize_write already
4833 // stripped WRITABLE, the scheduler won't re-run without a synthetic
4834 // wake — pair the interest insert with signal_pending_write.
4835 self.readiness.arm_writable();
4836 incr!(names::h2::SIGNAL_WRITABLE_REARMED_PRIORITY_UPDATE);
4837 MuxResult::Continue
4838 }
4839
4840 fn handle_rst_stream_frame<E, L>(
4841 &mut self,
4842 rst_stream: parser::RstStream,
4843 context: &mut Context<L>,
4844 mut endpoint: E,
4845 ) -> MuxResult
4846 where
4847 E: Endpoint,
4848 L: ListenerHandler + L7ListenerHandler,
4849 {
4850 // Per-error-code counter for the inbound RST. Emitted before the
4851 // flood-detector trip check so even a connection that gets terminated
4852 // by `handle_flood_violation` shows up in the per-code breakdown
4853 // (the dedicated `h2.flood.violation.rst_stream_*` series tracks the
4854 // mitigation event itself).
4855 count!(metric_for_rst_stream_received(rst_stream.error_code), 1);
4856 // CVE-2023-44487 Rapid Reset + CVE-2019-9514: track RST_STREAM rate.
4857 self.flood_detector.rst_stream_count += 1;
4858 check_flood_or_return!(self);
4859 // Additional CVE-2023-44487 mitigation: lifetime cap on RST_STREAM
4860 // frames received. The per-window counter above half-decays, so a
4861 // patient client can keep ~50 RST/s forever; a never-decaying
4862 // lifetime counter puts an absolute ceiling on that amplification.
4863 // Streams whose backend response has not yet started count toward a
4864 // much lower "abusive" ceiling — this is the signature Rapid Reset
4865 // pattern where the attacker pays one RST frame and we pay a
4866 // backend round-trip for each.
4867 //
4868 // "Response started" here means the Server has begun producing
4869 // response bytes (backend kawa buffer past its initial phase). For
4870 // the Client position the concept does not apply symmetrically
4871 // (RSTs received from the backend are rare and benign), so we
4872 // conservatively flag them as abusive too — lifetime cap still
4873 // dominates in practice.
4874 let response_started = match self.streams.get(&rst_stream.stream_id) {
4875 Some(global_stream_id) => {
4876 let stream = &context.streams[*global_stream_id];
4877 !stream.back.is_initial()
4878 }
4879 // Stream already gone (e.g. closed, not yet registered) —
4880 // treat as response-started to avoid over-counting benign
4881 // races as abusive.
4882 None => true,
4883 };
4884 if let Some(violation) = self.flood_detector.record_rst_lifetime(response_started) {
4885 return self.handle_flood_violation(violation);
4886 }
4887 // Rapid Reset signature (CVE-2023-44487): a RST that arrives before the
4888 // backend has begun answering. Emitted alongside the per-code counter
4889 // so the SOC can alert on the rate of pre-response RSTs without
4890 // having to differentiate by error code.
4891 if !response_started {
4892 count!(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START, 1);
4893 }
4894 debug!(
4895 "{} RstStream({} -> {})",
4896 log_context!(self),
4897 rst_stream.error_code,
4898 H2Error::try_from(rst_stream.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str())
4899 );
4900 // Compute totals before removing the stream from the map,
4901 // so the removed stream's bytes are included in the total.
4902 let rst_byte_totals = self.compute_stream_byte_totals(context);
4903 if let Some(global_stream_id) = self.streams.get(&rst_stream.stream_id).copied() {
4904 let stream = &mut context.streams[global_stream_id];
4905 self.attribute_bytes_to_stream(&mut stream.metrics);
4906 let linked_token = stream.linked_token();
4907 let (client_rtt, server_rtt) =
4908 Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
4909 if let Some(token) = linked_token {
4910 endpoint.end_stream(token, global_stream_id, context);
4911 }
4912 let stream = &mut context.streams[global_stream_id];
4913 match &self.position {
4914 // Inbound RST_STREAM on the backend side terminates the in-flight
4915 // request without going through Connection::end_stream (the normal
4916 // place where Backend.active_requests is decremented), so do the
4917 // bookkeeping explicitly here to avoid leaking load counters.
4918 Position::Client(_, backend, BackendStatus::Connected) => {
4919 let mut backend_borrow = backend.borrow_mut();
4920 backend_borrow.active_requests =
4921 backend_borrow.active_requests.saturating_sub(1);
4922 }
4923 Position::Client(..) => {}
4924 Position::Server => {
4925 self.distribute_overhead(&mut stream.metrics, rst_byte_totals);
4926 // This is a special case, normally, all stream are terminated by the server
4927 // when the last byte of the response is written. Here, the reset is requested
4928 // on the server endpoint and immediately terminates, shortcutting the other path
4929 stream.metrics.backend_stop();
4930 stream.generate_access_log(
4931 true,
4932 Some("H2::ResetFrame"),
4933 context.listener.clone(),
4934 client_rtt,
4935 server_rtt,
4936 );
4937 stream.state = StreamState::Recycle;
4938 }
4939 }
4940 // Retire from streams/prioriser/stream_last_activity_at and
4941 // invalidate expect_write/expect_read if they reference this gid.
4942 self.remove_dead_stream(rst_stream.stream_id, global_stream_id);
4943 } else {
4944 self.attribute_bytes_to_overhead();
4945 }
4946 MuxResult::Continue
4947 }
4948
4949 fn handle_settings_frame<L>(
4950 &mut self,
4951 settings: parser::Settings,
4952 context: &mut Context<L>,
4953 ) -> MuxResult
4954 where
4955 L: ListenerHandler + L7ListenerHandler,
4956 {
4957 if settings.ack {
4958 // RFC 9113 §6.5: SETTINGS ACK must have empty payload
4959 if !settings.settings.is_empty() {
4960 error!("{} SETTINGS ACK with non-empty payload", log_context!(self));
4961 return self.goaway(H2Error::FrameSizeError);
4962 }
4963 // RFC 9113 §6.5: peer acknowledged our SETTINGS — clear timeout
4964 self.settings_sent_at = None;
4965 // RFC 7541 §4.2: sync the decoder's max allowed table size with
4966 // what we advertised. Currently a no-op (settings don't change at
4967 // runtime), but guards against future runtime SETTINGS updates.
4968 self.decoder.set_max_allowed_table_size(
4969 self.local_settings.settings_header_table_size as usize,
4970 );
4971 self.attribute_bytes_to_overhead();
4972 return MuxResult::Continue;
4973 }
4974 // CVE-2019-9515: track SETTINGS frame rate
4975 self.flood_detector.settings_count += 1;
4976 self.flood_detector.total_settings_received_lifetime = self
4977 .flood_detector
4978 .total_settings_received_lifetime
4979 .saturating_add(1);
4980 check_flood_or_return!(self);
4981 for setting in settings.settings {
4982 let v = setting.value;
4983 let mut is_error = false;
4984 #[rustfmt::skip]
4985 match setting.identifier {
4986 parser::SETTINGS_HEADER_TABLE_SIZE => {
4987 // Cap to the configured maximum — a malicious peer can
4988 // advertise up to 4 GB to inflate HPACK encoder memory.
4989 let cap = self.flood_detector.config.max_header_table_size;
4990 let capped = v.min(cap);
4991 self.peer_settings.settings_header_table_size = capped;
4992 self.encoder.set_max_table_size(capped as usize);
4993 // RFC 7541 §4.2 / §6.3: queue a dynamic-table-size-update
4994 // HPACK directive for the next header block we emit.
4995 // Without it, the peer's decoder keeps its previous (possibly
4996 // larger) table cap and our encoder-side change is silent
4997 // — conformance suites (h2spec `hpack/4.2`) will flag it.
4998 self.pending_table_size_update = Some(capped);
4999 },
5000 parser::SETTINGS_ENABLE_PUSH => { self.peer_settings.settings_enable_push = v == 1; is_error |= v > 1 },
5001 parser::SETTINGS_MAX_CONCURRENT_STREAMS => { self.peer_settings.settings_max_concurrent_streams = v },
5002 parser::SETTINGS_INITIAL_WINDOW_SIZE => { is_error |= self.update_initial_window_size(v, context) },
5003 parser::SETTINGS_MAX_FRAME_SIZE => { self.peer_settings.settings_max_frame_size = v; is_error |= !(MIN_MAX_FRAME_SIZE..MAX_MAX_FRAME_SIZE).contains(&v) },
5004 parser::SETTINGS_MAX_HEADER_LIST_SIZE => { self.peer_settings.settings_max_header_list_size = v },
5005 parser::SETTINGS_ENABLE_CONNECT_PROTOCOL => { self.peer_settings.settings_enable_connect_protocol = v == 1; is_error |= v > 1 },
5006 parser::SETTINGS_NO_RFC7540_PRIORITIES => { self.peer_settings.settings_no_rfc7540_priorities = v == 1; is_error |= v > 1 },
5007 other => { warn!("Unknown setting_id: {}, we MUST ignore this", other); self.flood_detector.glitch_count += 1 },
5008 };
5009 if is_error {
5010 error!("{} INVALID SETTING", log_context!(self));
5011 return self.goaway(H2Error::ProtocolError);
5012 }
5013 }
5014
5015 self.attribute_bytes_to_overhead();
5016
5017 // Enlarge the connection-level receive window for backend H2
5018 // connections (Position::Client). The server side does this in
5019 // the ServerSettings writable path, but the client needs to do
5020 // it here after receiving the server's initial SETTINGS.
5021 if self.position.is_client()
5022 && self.flow_control.window <= DEFAULT_INITIAL_WINDOW_SIZE as i32
5023 {
5024 let increment = self
5025 .connection_config
5026 .initial_connection_window
5027 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
5028 if increment > 0 {
5029 self.queue_window_update(0, increment);
5030 }
5031 // Do NOT increment flow_control.window here: sending our own
5032 // WINDOW_UPDATE enlarges the peer's send allowance, not ours.
5033 // Our send window is only updated by WINDOW_UPDATEs we receive
5034 // from the peer (RFC 9113 §6.9).
5035 }
5036
5037 let kawa = &mut self.zero;
5038 let ack = &serializer::SETTINGS_ACKNOWLEDGEMENT;
5039 let buf = kawa.storage.space();
5040 if buf.len() < ack.len() {
5041 error!(
5042 "{} No space in zero buffer for SETTINGS ACK ({} available, {} needed)",
5043 log_context!(self),
5044 buf.len(),
5045 ack.len()
5046 );
5047 return self.force_disconnect();
5048 }
5049 buf[..ack.len()].copy_from_slice(ack);
5050 kawa.storage.fill(ack.len());
5051
5052 self.readiness.interest.insert(Ready::WRITABLE);
5053 self.readiness.interest.remove(Ready::READABLE);
5054 self.expect_write = Some(H2StreamId::Zero);
5055 self.readiness.signal_pending_write();
5056 MuxResult::Continue
5057 }
5058
5059 fn handle_ping_frame(&mut self, ping: parser::Ping) -> MuxResult {
5060 if ping.ack {
5061 self.attribute_bytes_to_overhead();
5062 return MuxResult::Continue;
5063 }
5064 // CVE-2019-9512: track non-ACK PING frame rate
5065 self.flood_detector.ping_count += 1;
5066 self.flood_detector.total_ping_received_lifetime = self
5067 .flood_detector
5068 .total_ping_received_lifetime
5069 .saturating_add(1);
5070 check_flood_or_return!(self);
5071 self.attribute_bytes_to_overhead();
5072 let kawa = &mut self.zero;
5073 let ping_response_size = serializer::PING_ACKNOWLEDGEMENT_HEADER.len() + 8;
5074 if kawa.storage.space().len() < ping_response_size {
5075 error!(
5076 "{} No space in zero buffer for PING response ({} available, {} needed)",
5077 log_context!(self),
5078 kawa.storage.space().len(),
5079 ping_response_size
5080 );
5081 return self.force_disconnect();
5082 }
5083 match serializer::gen_ping_acknowledgement(kawa.storage.space(), &ping.payload) {
5084 Ok((_, size)) => {
5085 kawa.storage.fill(size);
5086 incr!(names::h2::FRAMES_TX_PING_ACK);
5087 }
5088 Err(error) => {
5089 error!(
5090 "{} Could not serialize PingFrame: {:?}",
5091 log_context!(self),
5092 error
5093 );
5094 return self.force_disconnect();
5095 }
5096 };
5097 self.readiness.interest.insert(Ready::WRITABLE);
5098 self.readiness.interest.remove(Ready::READABLE);
5099 self.expect_write = Some(H2StreamId::Zero);
5100 self.readiness.signal_pending_write();
5101 MuxResult::Continue
5102 }
5103
5104 fn handle_goaway_frame<E, L>(
5105 &mut self,
5106 goaway: parser::GoAway,
5107 context: &mut Context<L>,
5108 mut endpoint: E,
5109 ) -> MuxResult
5110 where
5111 E: Endpoint,
5112 L: ListenerHandler + L7ListenerHandler,
5113 {
5114 self.attribute_bytes_to_overhead();
5115 let error_name =
5116 H2Error::try_from(goaway.error_code).map_or("UNKNOWN_ERROR", |e| e.as_str());
5117 if goaway.error_code == H2Error::NoError as u32 {
5118 debug!(
5119 "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5120 log_context!(self),
5121 goaway.last_stream_id,
5122 error_name,
5123 goaway.additional_debug_data
5124 );
5125 } else {
5126 // Peer-originated failure: no variant of H2Error from a peer
5127 // implies a sozu bug. Impact handling is separate (retry above
5128 // `last_stream_id`, RST_STREAM for consumed streams) and logs
5129 // its own details below, so the summary drops to `warn!`.
5130 warn!(
5131 "{} Received GOAWAY: last_stream_id={}, error={}, debug_data={:?}",
5132 log_context!(self),
5133 goaway.last_stream_id,
5134 error_name,
5135 goaway.additional_debug_data
5136 );
5137 }
5138 count!(metric_for_goaway_received(goaway.error_code), 1);
5139 // RFC 9113 §6.8: begin graceful drain.
5140 self.drain.draining = true;
5141 self.drain.peer_last_stream_id = Some(goaway.last_stream_id);
5142
5143 // Streams with ID > last_stream_id were NOT processed by the peer.
5144 // Mark them for retry (StreamState::Link) so they can be retried
5145 // on a new connection.
5146 // IMPORTANT: do NOT call endpoint.end_stream() here — that would
5147 // remove the stream from the frontend's H2 stream map and send
5148 // RST_STREAM to the client, killing the request instead of retrying it.
5149 let mut retry_streams = Vec::new();
5150 for (&stream_id, &global_stream_id) in &self.streams {
5151 if stream_id > goaway.last_stream_id {
5152 retry_streams.push((stream_id, global_stream_id));
5153 }
5154 }
5155 for (stream_id, global_stream_id) in &retry_streams {
5156 // Remove from reverse index before transitioning away from Linked.
5157 if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5158 remove_backend_stream(&mut context.backend_streams, token, *global_stream_id);
5159 }
5160 let stream = &mut context.streams[*global_stream_id];
5161 if stream.front.consumed {
5162 // Request was already sent to this backend — we can't
5163 // replay it. Use the linked token's readiness (via endpoint)
5164 // so the RST_STREAM reaches the client.
5165 debug!(
5166 "{} GOAWAY: stream {} already consumed, cannot retry",
5167 log_context!(self),
5168 stream_id
5169 );
5170 if let StreamState::Linked(token) = stream.state {
5171 let front_readiness = endpoint.readiness_mut(token);
5172 forcefully_terminate_answer(stream, front_readiness, H2Error::RefusedStream);
5173 } else {
5174 warn!(
5175 "{} GOAWAY: stream {} consumed but not Linked, cannot notify frontend",
5176 log_context!(self),
5177 stream_id
5178 );
5179 }
5180 } else {
5181 stream.state = StreamState::Link;
5182 context.pending_links.push_back(*global_stream_id);
5183 }
5184 // Both retry (!consumed) and terminated (consumed) paths remove the
5185 // stream from self.streams without going through Connection::end_stream,
5186 // so decrement Backend.active_requests here to keep load metrics honest.
5187 if let Position::Client(_, backend, BackendStatus::Connected) = &self.position {
5188 let mut backend_borrow = backend.borrow_mut();
5189 backend_borrow.active_requests = backend_borrow.active_requests.saturating_sub(1);
5190 }
5191 // Retire from streams/prioriser/stream_last_activity_at and
5192 // invalidate expect_write/expect_read if they reference this gid.
5193 self.remove_dead_stream(*stream_id, *global_stream_id);
5194 }
5195
5196 // If no active streams remain, close immediately
5197 if self.streams.is_empty() {
5198 return self.goaway(H2Error::NoError);
5199 }
5200
5201 // Otherwise, let remaining streams (ID <= last_stream_id) complete.
5202 // The connection will be closed when all streams finish.
5203 MuxResult::Continue
5204 }
5205
5206 fn handle_window_update_frame<E, L>(
5207 &mut self,
5208 wu: WindowUpdate,
5209 context: &mut Context<L>,
5210 endpoint: E,
5211 ) -> MuxResult
5212 where
5213 E: Endpoint,
5214 L: ListenerHandler + L7ListenerHandler,
5215 {
5216 let stream_id = wu.stream_id;
5217 let increment = wu.increment;
5218
5219 // RFC 9113 §6.9: increment of 0 MUST be treated as an error.
5220 // Connection-level (stream 0) -> connection error (GOAWAY).
5221 // Stream-level -> stream error (RST_STREAM).
5222 if increment == 0 {
5223 if stream_id == 0 {
5224 error!(
5225 "{} WINDOW_UPDATE with zero increment on connection (stream 0)",
5226 log_context!(self)
5227 );
5228 return self.goaway(H2Error::ProtocolError);
5229 } else {
5230 error!(
5231 "{} WINDOW_UPDATE with zero increment on stream {}",
5232 log_context!(self),
5233 stream_id
5234 );
5235 if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5236 let result = self.reset_stream(
5237 stream_id,
5238 global_stream_id,
5239 context,
5240 endpoint,
5241 H2Error::ProtocolError,
5242 );
5243 self.remove_dead_stream(stream_id, global_stream_id);
5244 return result;
5245 }
5246 // Stream not in map (already closed) — treat as glitch
5247 self.flood_detector.glitch_count += 1;
5248 check_flood_or_return!(self);
5249 self.attribute_bytes_to_overhead();
5250 return MuxResult::Continue;
5251 }
5252 }
5253
5254 // The parser masks the reserved bit (STREAM_ID_MASK), so increment <=
5255 // 2^31-1 and try_from always succeeds. Use try_from rather than `as` to
5256 // guard against a future parser change that drops the mask.
5257 let increment = i32::try_from(increment).unwrap_or(i32::MAX);
5258 if stream_id == 0 {
5259 // Count connection-level WINDOW_UPDATEs before touching the window
5260 // so a per-window flood stops us before we pay the arithmetic cost
5261 // on a million-frame burst. Zero-increment frames short-circuited
5262 // above, so every increment here is a legal-looking rate consumer.
5263 self.flood_detector.window_update_stream0_count = self
5264 .flood_detector
5265 .window_update_stream0_count
5266 .saturating_add(1);
5267 check_flood_or_return!(self);
5268 self.attribute_bytes_to_overhead();
5269 if let Some(window) = self.flow_control.window.checked_add(increment) {
5270 if self.flow_control.window <= 0 && window > 0 {
5271 self.readiness.arm_writable();
5272 }
5273 self.flow_control.window = window;
5274 debug!(
5275 "{} WINDOW_UPDATE received: stream=0 increment={} new_connection_window={}",
5276 log_context!(self),
5277 increment,
5278 self.flow_control.window
5279 );
5280 } else {
5281 error!("{} INVALID WINDOW INCREMENT", log_context!(self));
5282 return self.goaway(H2Error::FlowControlError);
5283 }
5284 } else if let Some(global_stream_id) = self.streams.get(&stream_id).copied() {
5285 let stream = &mut context.streams[global_stream_id];
5286 self.attribute_bytes_to_stream(&mut stream.metrics);
5287 if let Some(window) = stream.window.checked_add(increment) {
5288 if stream.window <= 0 && window > 0 {
5289 self.readiness.arm_writable();
5290 }
5291 stream.window = window;
5292 debug!(
5293 "{} WINDOW_UPDATE received: stream={} increment={} new_stream_window={}",
5294 log_context!(self),
5295 stream_id,
5296 increment,
5297 stream.window
5298 );
5299 } else {
5300 let result = self.reset_stream(
5301 stream_id,
5302 global_stream_id,
5303 context,
5304 endpoint,
5305 H2Error::FlowControlError,
5306 );
5307 self.remove_dead_stream(stream_id, global_stream_id);
5308 return result;
5309 }
5310 } else {
5311 self.attribute_bytes_to_overhead();
5312 trace!(
5313 "{} Ignoring window update on closed stream {}: {}",
5314 log_context!(self),
5315 stream_id,
5316 increment
5317 );
5318 // Pass 3 Low #5: WINDOW_UPDATE on a closed stream is legal
5319 // (RFC 9113 §6.9.1) but has no useful effect, so a peer that
5320 // keeps sending them is wasting our cycles. Count it as a
5321 // glitch so a flood contributes to `check_flood()` and can
5322 // eventually trigger ENHANCE_YOUR_CALM.
5323 self.flood_detector.glitch_count += 1;
5324 check_flood_or_return!(self);
5325 }
5326 MuxResult::Continue
5327 }
5328
5329 fn update_initial_window_size<L>(&mut self, value: u32, context: &mut Context<L>) -> bool
5330 where
5331 L: ListenerHandler + L7ListenerHandler,
5332 {
5333 if value > FLOW_CONTROL_MAX_WINDOW {
5334 return true;
5335 }
5336 let delta = match i32::try_from(
5337 value as i64 - self.peer_settings.settings_initial_window_size as i64,
5338 ) {
5339 Ok(d) => d,
5340 Err(_) => {
5341 error!("{} initial window size delta overflow", log_context!(self));
5342 return true;
5343 }
5344 };
5345 let mut open_window = false;
5346 // Only update windows for streams owned by this connection
5347 for &global_stream_id in self.streams.values() {
5348 let stream = &mut context.streams[global_stream_id];
5349 // RFC 9113 §6.9.2: changes to SETTINGS_INITIAL_WINDOW_SIZE can cause
5350 // stream windows to exceed 2^31-1, which is a flow control error.
5351 match stream.window.checked_add(delta) {
5352 Some(new_window) => {
5353 open_window |= stream.window <= 0 && new_window > 0;
5354 stream.window = new_window;
5355 }
5356 None => return true,
5357 }
5358 }
5359 trace!(
5360 "{} UPDATE INIT WINDOW: {} {} {:?}",
5361 log_context!(self),
5362 delta,
5363 open_window,
5364 self.readiness
5365 );
5366 if open_window {
5367 self.readiness.arm_writable();
5368 }
5369 self.peer_settings.settings_initial_window_size = value;
5370 false
5371 }
5372
5373 pub fn force_disconnect(&mut self) -> MuxResult {
5374 self.state = H2State::Error;
5375 match &mut self.position {
5376 Position::Client(_, _, status) => {
5377 *status = BackendStatus::Disconnecting;
5378 self.readiness.event = Ready::HUP;
5379 debug!(
5380 "{} H2 force_disconnect client: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
5381 log_context!(self),
5382 self.state,
5383 self.streams.len(),
5384 self.expect_write,
5385 self.socket.socket_wants_write(),
5386 self.readiness
5387 );
5388 MuxResult::Continue
5389 }
5390 Position::Server => {
5391 if self.peer_gone_after_final_goaway() {
5392 return MuxResult::CloseSession;
5393 }
5394 // Don't disconnect immediately if rustls still has buffered TLS
5395 // records. Returning CloseSession here triggers shutdown(Write)
5396 // which sends FIN — but any TLS records still in rustls's buffer
5397 // (not yet flushed to the TCP send buffer) are lost, causing the
5398 // client to see "TLS decode error / unexpected eof".
5399 // Instead, keep WRITABLE interest and let the writable path flush.
5400 if self.socket.socket_wants_write() {
5401 debug!(
5402 "{} H2 force_disconnect delaying close: state={:?}, streams={}, expect_write={:?}, wants_write=true, readiness={:?}",
5403 log_context!(self),
5404 self.state,
5405 self.streams.len(),
5406 self.expect_write,
5407 self.readiness
5408 );
5409 self.readiness.interest = Ready::WRITABLE | Ready::HUP | Ready::ERROR;
5410 self.ensure_tls_flushed();
5411 MuxResult::Continue
5412 } else {
5413 debug!(
5414 "{} H2 force_disconnect closing session: state={:?}, streams={}, expect_write={:?}, wants_write=false, readiness={:?}",
5415 log_context!(self),
5416 self.state,
5417 self.streams.len(),
5418 self.expect_write,
5419 self.readiness
5420 );
5421 MuxResult::CloseSession
5422 }
5423 }
5424 }
5425 }
5426
5427 pub fn close<E, L>(&mut self, context: &mut Context<L>, mut endpoint: E)
5428 where
5429 E: Endpoint,
5430 L: ListenerHandler + L7ListenerHandler,
5431 {
5432 match self.position {
5433 Position::Client(_, _, BackendStatus::KeepAlive) => {
5434 error!(
5435 "{} H2 connections do not use KeepAlive backend status",
5436 log_context!(self)
5437 );
5438 return;
5439 }
5440 Position::Client(..) => {}
5441 Position::Server => {
5442 let tls_pending_before = self.socket.socket_wants_write();
5443 if !self.streams.is_empty() || tls_pending_before || self.expect_write.is_some() {
5444 debug!(
5445 "{} H2 close with active state: state={:?}, streams={}, expect_write={:?}, wants_write={}, readiness={:?}",
5446 log_context!(self),
5447 self.state,
5448 self.streams.len(),
5449 self.expect_write,
5450 tls_pending_before,
5451 self.readiness
5452 );
5453 for (stream_id, global_stream_id) in &self.streams {
5454 let stream = &context.streams[*global_stream_id];
5455 debug!(
5456 "{} close stream id={} gid={}: state={:?}, front_eos={}, back_eos={}, front_phase={:?}, back_phase={:?}, front_completed={}, back_completed={}",
5457 log_context!(self),
5458 stream_id,
5459 global_stream_id,
5460 stream.state,
5461 stream.front_received_end_of_stream,
5462 stream.back_received_end_of_stream,
5463 stream.front.parsing_phase,
5464 stream.back.parsing_phase,
5465 stream.front.is_completed(),
5466 stream.back.is_completed()
5467 );
5468 }
5469 }
5470 if !self.close_notify_sent {
5471 trace!("{} H2 SENDING CLOSE NOTIFY", log_context!(self));
5472 }
5473 let (tls_pending_after, drain_rounds) =
5474 drain_tls_close_notify(&mut self.socket, &mut self.close_notify_sent);
5475 if tls_pending_after {
5476 // Severity tiering: key on stream-count + close-state, not
5477 // peer-vs-operator. Composes with the send-side `H2Error`
5478 // variant tier in `goaway()` — both rules demote benign
5479 // paths and keep loss-bearing paths loud.
5480 //
5481 // - `streams != 0` -> `error!`: live streams at
5482 // close time, response-byte loss is possible.
5483 // - `streams == 0` AND state in {GoAway, Error}
5484 // -> `warn!`: idle close after
5485 // a GOAWAY exchange (peer-initiated abort or our own
5486 // graceful drain). What's stranded is best-effort
5487 // GOAWAY/close_notify; no application data was queued.
5488 // - `streams == 0` from any other state
5489 // -> `error!`: unexpected
5490 // teardown path (no GOAWAY exchange) — keep loud so
5491 // unknown failure modes surface.
5492 if !self.streams.is_empty() {
5493 error!(
5494 "{} TLS buffer NOT fully drained on close: \
5495 pending_before={}, pending_after={}, drain_rounds={}, \
5496 state={:?}, streams={}, expect_write={:?}, \
5497 close_notify_sent={}, readiness={:?}",
5498 log_context!(self),
5499 tls_pending_before,
5500 tls_pending_after,
5501 drain_rounds,
5502 self.state,
5503 self.streams.len(),
5504 self.expect_write,
5505 self.close_notify_sent,
5506 self.readiness
5507 );
5508 } else if matches!(self.state, H2State::GoAway | H2State::Error) {
5509 warn!(
5510 "{} TLS buffer NOT fully drained on close: \
5511 pending_before={}, pending_after={}, drain_rounds={}, \
5512 state={:?}, streams={}, expect_write={:?}, \
5513 close_notify_sent={}, readiness={:?}",
5514 log_context!(self),
5515 tls_pending_before,
5516 tls_pending_after,
5517 drain_rounds,
5518 self.state,
5519 self.streams.len(),
5520 self.expect_write,
5521 self.close_notify_sent,
5522 self.readiness
5523 );
5524 } else {
5525 error!(
5526 "{} TLS buffer NOT fully drained on close: \
5527 pending_before={}, pending_after={}, drain_rounds={}, \
5528 state={:?}, streams={}, expect_write={:?}, \
5529 close_notify_sent={}, readiness={:?}",
5530 log_context!(self),
5531 tls_pending_before,
5532 tls_pending_after,
5533 drain_rounds,
5534 self.state,
5535 self.streams.len(),
5536 self.expect_write,
5537 self.close_notify_sent,
5538 self.readiness
5539 );
5540 }
5541 }
5542 return;
5543 }
5544 }
5545 // reconnection is handled by the server for each stream separately
5546 for global_stream_id in self.streams.values() {
5547 trace!("{} end stream: {}", log_context!(self), global_stream_id);
5548 if let StreamState::Linked(token) = context.streams[*global_stream_id].state {
5549 endpoint.end_stream(token, *global_stream_id, context);
5550 }
5551 }
5552 }
5553
5554 /// Reset a stream: tear down kawa state, emit `RST_STREAM` on the wire,
5555 /// and record MadeYouReset accounting.
5556 ///
5557 /// `wire_stream_id` is the on-wire `StreamId`; `stream_id` is the internal
5558 /// `GlobalStreamId` slot. Callers already carry both so we pass them
5559 /// explicitly rather than scanning `self.streams`. The wire id is threaded
5560 /// into [`Self::enqueue_rst`] which queues the frame for serialisation in
5561 /// [`Self::flush_pending_control_frames`] on the next writable tick —
5562 /// independent of whether the caller immediately evicts the slot via
5563 /// `remove_dead_stream` (which they usually do). This is what guarantees
5564 /// the RST reaches the peer for malformed HEADERS / flow-control /
5565 /// content-length violations flagged by h2spec 2.0.
5566 pub fn reset_stream<E, L>(
5567 &mut self,
5568 wire_stream_id: StreamId,
5569 stream_id: GlobalStreamId,
5570 context: &mut Context<L>,
5571 mut endpoint: E,
5572 error: H2Error,
5573 ) -> MuxResult
5574 where
5575 E: Endpoint,
5576 L: ListenerHandler + L7ListenerHandler,
5577 {
5578 // Compute totals before taking mutable borrows on the target stream.
5579 let reset_byte_totals = self.compute_stream_byte_totals(context);
5580 context.unlink_stream(stream_id);
5581 let stream = &mut context.streams[stream_id];
5582 trace!(
5583 "{} reset H2 stream {}: {:#?}",
5584 log_context!(self),
5585 stream_id,
5586 stream.context
5587 );
5588 let old_state = std::mem::replace(&mut stream.state, StreamState::Unlinked);
5589 forcefully_terminate_answer(stream, &mut self.readiness, error);
5590 let linked_token = if let StreamState::Linked(token) = old_state {
5591 Some(token)
5592 } else {
5593 None
5594 };
5595 let (client_rtt, server_rtt) =
5596 Self::snapshot_rtts(&self.position, &self.socket, &endpoint, linked_token);
5597 if let Some(token) = linked_token {
5598 endpoint.end_stream(token, stream_id, context);
5599 }
5600 // Emit access log for server-side resets on streams that had active requests
5601 if self.position.is_server()
5602 && matches!(old_state, StreamState::Link | StreamState::Linked(_))
5603 {
5604 let stream = &mut context.streams[stream_id];
5605 self.distribute_overhead(&mut stream.metrics, reset_byte_totals);
5606 stream.metrics.backend_stop();
5607 stream.generate_access_log(
5608 true,
5609 Some("H2::Reset"),
5610 context.listener.clone(),
5611 client_rtt,
5612 server_rtt,
5613 );
5614 stream.metrics.reset();
5615 }
5616 // Queue the RST for wire emission. Independent of the owning stream
5617 // remaining in `self.streams` — callers typically follow this with
5618 // `remove_dead_stream`, which would otherwise evict the slot before
5619 // `write_streams` could run `kawa.prepare` against the converter.
5620 //
5621 // `enqueue_rst` performs every accounting side-effect at queue
5622 // time (per-error counter, global tx counter, CVE-2025-8671
5623 // MadeYouReset lifetime cap). Graceful `NoError` cancels —
5624 // stream recycle, propagated client-side cancel — are exempt
5625 // from the lifetime cap inside the accounting helper itself.
5626 if let Some(result) = self.enqueue_rst(wire_stream_id, error) {
5627 return result;
5628 }
5629 MuxResult::Continue
5630 }
5631
5632 pub fn end_stream<L>(&mut self, stream_gid: GlobalStreamId, context: &mut Context<L>)
5633 where
5634 L: ListenerHandler + L7ListenerHandler,
5635 {
5636 context.unlink_stream(stream_gid);
5637 let stream_context = context.http_context(stream_gid);
5638 trace!(
5639 "{} end H2 stream {}: {:#?}",
5640 log_context!(self),
5641 stream_gid,
5642 stream_context
5643 );
5644 match self.position {
5645 Position::Client(..) => {
5646 // Resolve the wire StreamId for this gid up front so the
5647 // subsequent cleanup does not hold an iterator borrow on
5648 // `self.streams` while also mutating it.
5649 let wire_stream_id = self
5650 .streams
5651 .iter()
5652 .find_map(|(&sid, &gid)| (gid == stream_gid).then_some(sid));
5653 if let Some(id) = wire_stream_id {
5654 // Only send RST_STREAM if the stream hasn't fully completed.
5655 // If both request and response are terminated, the stream is
5656 // already in "closed" state (RFC 9113 §5.1) — sending RST_STREAM
5657 // on a closed stream would be a protocol error that could cause
5658 // the H2 peer to close the entire connection.
5659 let stream = &context.streams[stream_gid];
5660 let fully_completed =
5661 stream.back_received_end_of_stream && stream.front.is_terminated();
5662 if !fully_completed && !self.rst_sent.contains(&id) {
5663 let kawa = &mut self.zero;
5664 let mut frame = [0; 13];
5665 if let Ok((_, _size)) =
5666 serializer::gen_rst_stream(&mut frame, id, H2Error::Cancel)
5667 {
5668 let buf = kawa.storage.space();
5669 if buf.len() >= frame.len() {
5670 buf[..frame.len()].copy_from_slice(&frame);
5671 kawa.storage.fill(frame.len());
5672 incr!(names::h2::FRAMES_TX_RST_STREAM);
5673 count!(metric_for_rst_stream_sent(H2Error::Cancel), 1);
5674 self.readiness.arm_writable();
5675 self.rst_sent.insert(id);
5676 }
5677 }
5678 }
5679 // Retire the stream and invalidate expect_write/expect_read
5680 // if they still reference this gid — the slot may be popped
5681 // by `shrink_trailing_recycle` on the next create_stream.
5682 self.remove_dead_stream(id, stream_gid);
5683 if context.streams[stream_gid].state != StreamState::Recycle {
5684 context.streams[stream_gid].state = StreamState::Unlinked;
5685 }
5686 return;
5687 }
5688 error!(
5689 "{} end_stream called for unknown global_stream_id {}",
5690 log_context!(self),
5691 stream_gid
5692 );
5693 }
5694 Position::Server => {
5695 let answers_rc = context.listener.borrow().get_answers().clone();
5696 let stream = &mut context.streams[stream_gid];
5697 match end_stream_decision(stream) {
5698 EndStreamAction::ForwardTerminated => {
5699 #[cfg(debug_assertions)]
5700 context
5701 .debug
5702 .push(DebugEvent::Str(format!("Close terminated {stream_gid}")));
5703 debug!(
5704 "{} CLOSING H2 TERMINATED STREAM {} {:?}",
5705 log_context!(self),
5706 stream_gid,
5707 stream
5708 );
5709 stream.state = StreamState::Unlinked;
5710 self.readiness.arm_writable();
5711 context.debug.set_interesting(true);
5712 }
5713 EndStreamAction::CloseDelimited => {
5714 debug!(
5715 "{} CLOSE DELIMITED H2 STREAM {} {:?}",
5716 log_context!(self),
5717 stream_gid,
5718 stream
5719 );
5720 stream.back.push_block(kawa::Block::Flags(kawa::Flags {
5721 end_body: true,
5722 end_chunk: false,
5723 end_header: false,
5724 end_stream: true,
5725 }));
5726 stream.back.parsing_phase = kawa::ParsingPhase::Terminated;
5727 stream.state = StreamState::Unlinked;
5728 self.readiness.arm_writable();
5729 context.debug.set_interesting(true);
5730 }
5731 EndStreamAction::ForwardUnterminated => {
5732 #[cfg(debug_assertions)]
5733 context
5734 .debug
5735 .push(DebugEvent::Str(format!("Close unterminated {stream_gid}")));
5736 debug!(
5737 "{} CLOSING H2 UNTERMINATED STREAM {} {:?}",
5738 log_context!(self),
5739 stream_gid,
5740 stream
5741 );
5742 forcefully_terminate_answer(
5743 stream,
5744 &mut self.readiness,
5745 H2Error::InternalError,
5746 );
5747 context.debug.set_interesting(true);
5748 }
5749 EndStreamAction::SendDefault(status) => {
5750 #[cfg(debug_assertions)]
5751 context.debug.push(DebugEvent::Str(format!(
5752 "Can't retry, send {status} on {stream_gid}"
5753 )));
5754 let answers = answers_rc.borrow();
5755 set_default_answer(stream, &mut self.readiness, status, &answers);
5756 }
5757 EndStreamAction::Reconnect => {
5758 debug!("{} H2 RECONNECT", log_context!(self));
5759 #[cfg(debug_assertions)]
5760 context
5761 .debug
5762 .push(DebugEvent::Str(format!("Retry {stream_gid}")));
5763 stream.state = StreamState::Link;
5764 context.pending_links.push_back(stream_gid);
5765 }
5766 }
5767 }
5768 }
5769 }
5770
5771 pub fn start_stream<L>(&mut self, stream: GlobalStreamId, _context: &mut Context<L>) -> bool
5772 where
5773 L: ListenerHandler + L7ListenerHandler,
5774 {
5775 // RFC 9113 §6.8: reject new streams on a draining connection
5776 if self.drain.draining {
5777 error!(
5778 "{} Cannot open new stream on draining connection (stream {})",
5779 log_context!(self),
5780 stream
5781 );
5782 return false;
5783 }
5784 // RFC 9113 §5.1.2: respect peer's max concurrent streams limit
5785 if self.streams.len() >= self.peer_settings.settings_max_concurrent_streams as usize {
5786 error!(
5787 "{} Cannot open new stream: active={} >= peer max_concurrent_streams={}",
5788 log_context!(self),
5789 self.streams.len(),
5790 self.peer_settings.settings_max_concurrent_streams
5791 );
5792 return false;
5793 }
5794 trace!(
5795 "{} start new H2 stream {} {:?}",
5796 log_context!(self),
5797 stream,
5798 self.readiness
5799 );
5800 let Some(stream_id) = self.new_stream_id() else {
5801 // Pass 4 Medium #5: the client-initiated stream-ID space
5802 // (31 bits, odd only) is exhausted. The backend is now useless
5803 // for new requests — gracefully drain it. Without this
5804 // transition, the Connection lingers in `Connected` state and
5805 // every subsequent request returns 503 because `start_stream`
5806 // keeps returning false.
5807 //
5808 // The session envelope is hoisted to a local because the
5809 // `match &mut self.position` below holds a mutable borrow on
5810 // `self.position`, and `log_context!(self)` reads that field
5811 // for its `position={...}` slot — calling the macro inside the
5812 // match arms would conflict with the active borrow. The
5813 // bidirectional regression guard in `lib/tests/log_layout.rs`
5814 // (and the matching scanner in `lib/build.rs`) recognises this
5815 // shape by scanning backward as well as forward from each log
5816 // call.
5817 let context = log_context!(self);
5818 match &mut self.position {
5819 Position::Client(cluster_id, backend, status) => {
5820 let backend_addr = backend.borrow().address;
5821 let cluster = cluster_id.clone();
5822 info!(
5823 "{} H2 backend stream IDs exhausted (cluster={}, backend={:?}) — draining",
5824 context, cluster, backend_addr
5825 );
5826 *status = BackendStatus::Disconnecting;
5827 }
5828 Position::Server => {
5829 error!(
5830 "{} H2 server stream IDs exhausted — sending graceful GOAWAY",
5831 context
5832 );
5833 }
5834 }
5835 self.graceful_goaway();
5836 return false;
5837 };
5838 self.streams.insert(stream_id, stream);
5839 self.stream_last_activity_at
5840 .insert(stream_id, Instant::now());
5841 self.readiness.arm_writable();
5842 true
5843 }
5844}
5845
5846#[cfg(test)]
5847mod tests {
5848 use std::{cell::RefCell, rc::Rc};
5849
5850 use super::*;
5851 use crate::{pool::Pool, protocol::kawa_h1::editor::HttpContext};
5852
5853 // ── H2FloodDetector ──────────────────────────────────────────────────
5854
5855 #[test]
5856 fn test_flood_detector_no_flood_below_threshold() {
5857 let config = H2FloodConfig::default();
5858 let mut detector = H2FloodDetector::new(config);
5859
5860 // All counters at zero -> no flood
5861 assert!(detector.check_flood().is_none());
5862
5863 // Increment each counter to exactly the threshold (not exceeding)
5864 detector.rst_stream_count = config.max_rst_stream_per_window;
5865 detector.ping_count = config.max_ping_per_window;
5866 detector.settings_count = config.max_settings_per_window;
5867 detector.empty_data_count = config.max_empty_data_per_window;
5868 detector.continuation_count = config.max_continuation_frames;
5869 detector.glitch_count = config.max_glitch_count;
5870 // At threshold but not exceeding -> no flood
5871 assert!(detector.check_flood().is_none());
5872 }
5873
5874 #[test]
5875 fn test_flood_detector_detects_rapid_reset() {
5876 let config = H2FloodConfig::default();
5877 let mut detector = H2FloodDetector::new(config);
5878
5879 detector.rst_stream_count = config.max_rst_stream_per_window + 1;
5880 assert!(matches!(
5881 detector.check_flood(),
5882 Some(H2FloodViolation {
5883 error: H2Error::EnhanceYourCalm,
5884 ..
5885 })
5886 ));
5887 }
5888
5889 #[test]
5890 fn test_flood_detector_detects_ping_flood() {
5891 let config = H2FloodConfig::default();
5892 let mut detector = H2FloodDetector::new(config);
5893
5894 detector.ping_count = config.max_ping_per_window + 1;
5895 assert!(matches!(
5896 detector.check_flood(),
5897 Some(H2FloodViolation {
5898 error: H2Error::EnhanceYourCalm,
5899 ..
5900 })
5901 ));
5902 }
5903
5904 #[test]
5905 fn test_flood_detector_detects_settings_flood() {
5906 let config = H2FloodConfig::default();
5907 let mut detector = H2FloodDetector::new(config);
5908
5909 detector.settings_count = config.max_settings_per_window + 1;
5910 assert!(matches!(
5911 detector.check_flood(),
5912 Some(H2FloodViolation {
5913 error: H2Error::EnhanceYourCalm,
5914 ..
5915 })
5916 ));
5917 }
5918
5919 #[test]
5920 fn test_flood_detector_detects_empty_data_flood() {
5921 let config = H2FloodConfig::default();
5922 let mut detector = H2FloodDetector::new(config);
5923
5924 detector.empty_data_count = config.max_empty_data_per_window + 1;
5925 assert!(matches!(
5926 detector.check_flood(),
5927 Some(H2FloodViolation {
5928 error: H2Error::EnhanceYourCalm,
5929 ..
5930 })
5931 ));
5932 }
5933
5934 #[test]
5935 fn test_flood_detector_detects_continuation_flood() {
5936 let config = H2FloodConfig::default();
5937 let mut detector = H2FloodDetector::new(config);
5938
5939 detector.continuation_count = config.max_continuation_frames + 1;
5940 assert!(matches!(
5941 detector.check_flood(),
5942 Some(H2FloodViolation {
5943 error: H2Error::EnhanceYourCalm,
5944 ..
5945 })
5946 ));
5947 }
5948
5949 #[test]
5950 fn test_flood_detector_detects_header_size_flood() {
5951 let config = H2FloodConfig::default();
5952 let mut detector = H2FloodDetector::new(config);
5953
5954 detector.accumulated_header_size = MAX_HEADER_LIST_SIZE as u32 + 1;
5955 assert!(matches!(
5956 detector.check_flood(),
5957 Some(H2FloodViolation {
5958 error: H2Error::EnhanceYourCalm,
5959 ..
5960 })
5961 ));
5962 }
5963
5964 #[test]
5965 fn test_flood_detector_detects_glitch_flood() {
5966 let config = H2FloodConfig::default();
5967 let mut detector = H2FloodDetector::new(config);
5968
5969 detector.glitch_count = config.max_glitch_count + 1;
5970 assert!(matches!(
5971 detector.check_flood(),
5972 Some(H2FloodViolation {
5973 error: H2Error::EnhanceYourCalm,
5974 ..
5975 })
5976 ));
5977 }
5978
5979 #[test]
5980 fn test_flood_detector_custom_thresholds() {
5981 let config = H2FloodConfig {
5982 max_rst_stream_per_window: 5,
5983 max_ping_per_window: 10,
5984 max_settings_per_window: 3,
5985 max_empty_data_per_window: 8,
5986 max_continuation_frames: 2,
5987 max_glitch_count: 15,
5988 ..H2FloodConfig::default()
5989 };
5990 let mut detector = H2FloodDetector::new(config);
5991
5992 // Below custom threshold -> no flood
5993 detector.rst_stream_count = 5;
5994 assert!(detector.check_flood().is_none());
5995
5996 // Above custom threshold -> flood
5997 detector.rst_stream_count = 6;
5998 assert!(matches!(
5999 detector.check_flood(),
6000 Some(H2FloodViolation {
6001 error: H2Error::EnhanceYourCalm,
6002 ..
6003 })
6004 ));
6005 }
6006
6007 #[test]
6008 fn test_flood_detector_reset_continuation() {
6009 let config = H2FloodConfig::default();
6010 let mut detector = H2FloodDetector::new(config);
6011
6012 detector.continuation_count = 15;
6013 detector.accumulated_header_size = 30000;
6014
6015 detector.reset_continuation();
6016
6017 assert_eq!(detector.continuation_count, 0);
6018 assert_eq!(detector.accumulated_header_size, 0);
6019 }
6020
6021 #[test]
6022 fn test_flood_detector_half_decay_on_window_expiry() {
6023 let config = H2FloodConfig::default();
6024 let mut detector = H2FloodDetector::new(config);
6025
6026 detector.rst_stream_count = 80;
6027 detector.ping_count = 60;
6028 detector.settings_count = 40;
6029 detector.empty_data_count = 20;
6030 detector.window_update_stream0_count = 90;
6031 detector.glitch_count = 50;
6032
6033 // Force window expiry by setting window_start to the past
6034 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6035
6036 // check_flood calls maybe_reset_window which halves counters
6037 let _ = detector.check_flood();
6038
6039 assert_eq!(detector.rst_stream_count, 40);
6040 assert_eq!(detector.ping_count, 30);
6041 assert_eq!(detector.settings_count, 20);
6042 assert_eq!(detector.empty_data_count, 10);
6043 assert_eq!(detector.window_update_stream0_count, 45);
6044 assert_eq!(detector.glitch_count, 25);
6045 }
6046
6047 #[test]
6048 fn test_flood_detector_window_update_stream0_trips_at_threshold() {
6049 let config = H2FloodConfig {
6050 max_window_update_stream0_per_window: 5,
6051 ..H2FloodConfig::default()
6052 };
6053 let mut detector = H2FloodDetector::new(config);
6054
6055 // At threshold — no flood yet (strict greater-than, matches existing counters).
6056 detector.window_update_stream0_count = 5;
6057 assert!(detector.check_flood().is_none());
6058
6059 // Above threshold — flood with the correct violation reason + metric key.
6060 detector.window_update_stream0_count = 6;
6061 let violation = detector
6062 .check_flood()
6063 .expect("WINDOW_UPDATE stream-0 flood must trip above threshold");
6064 assert_eq!(violation.error, H2Error::EnhanceYourCalm);
6065 assert_eq!(violation.reason, "WINDOW_UPDATE stream 0");
6066 assert_eq!(
6067 violation.metric_key,
6068 "h2.flood.violation.window_update_stream0_window"
6069 );
6070 assert_eq!(violation.count, 6);
6071 assert_eq!(violation.threshold, 5);
6072 }
6073
6074 #[test]
6075 fn test_flood_detector_window_update_stream0_honours_default() {
6076 // Default threshold must match the documented constant so operators
6077 // can reason about behaviour without reading code.
6078 let detector = H2FloodDetector::default();
6079 assert_eq!(
6080 detector.config.max_window_update_stream0_per_window,
6081 DEFAULT_MAX_WINDOW_UPDATE_STREAM0_PER_WINDOW
6082 );
6083 assert_eq!(detector.window_update_stream0_count, 0);
6084 }
6085
6086 #[test]
6087 fn test_flood_detector_decay_prevents_flood() {
6088 let config = H2FloodConfig {
6089 max_rst_stream_per_window: 10,
6090 ..H2FloodConfig::default()
6091 };
6092 let mut detector = H2FloodDetector::new(config);
6093
6094 // Set counter just above threshold
6095 detector.rst_stream_count = 12;
6096
6097 // Without decay -> flood
6098 assert!(matches!(
6099 detector.check_flood(),
6100 Some(H2FloodViolation {
6101 error: H2Error::EnhanceYourCalm,
6102 ..
6103 })
6104 ));
6105
6106 // Reset and simulate window expiry
6107 detector.rst_stream_count = 12;
6108 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6109
6110 // After decay: 12/2 = 6, which is below threshold 10 -> no flood
6111 assert!(detector.check_flood().is_none());
6112 }
6113
6114 #[test]
6115 fn test_flood_detector_lifetime_rst_cap_triggers_enhance_your_calm() {
6116 // CVE-2023-44487 Rapid Reset: a patient attacker that stays under
6117 // the half-decaying per-window threshold must still be stopped by
6118 // the lifetime cap. Simulate a response-started RST (no abusive
6119 // counter bump) so only the lifetime ceiling is tested.
6120 let mut detector = H2FloodDetector::default();
6121 for _ in 0..DEFAULT_MAX_RST_STREAM_LIFETIME {
6122 assert!(detector.record_rst_lifetime(true).is_none());
6123 }
6124 assert_eq!(
6125 detector.total_rst_received_lifetime,
6126 DEFAULT_MAX_RST_STREAM_LIFETIME
6127 );
6128 assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6129 // Next RST crosses the ceiling.
6130 assert!(matches!(
6131 detector.record_rst_lifetime(true),
6132 Some(H2FloodViolation {
6133 error: H2Error::EnhanceYourCalm,
6134 ..
6135 })
6136 ));
6137 }
6138
6139 #[test]
6140 fn test_flood_detector_abusive_rst_cap_triggers_first() {
6141 // Pre-response-start RSTs have a much lower ceiling; they trip
6142 // well before the generic lifetime cap.
6143 let mut detector = H2FloodDetector::default();
6144 for _ in 0..DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME {
6145 assert!(detector.record_rst_lifetime(false).is_none());
6146 }
6147 assert_eq!(
6148 detector.total_abusive_rst_received_lifetime,
6149 DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME
6150 );
6151 assert!(matches!(
6152 detector.record_rst_lifetime(false),
6153 Some(H2FloodViolation {
6154 error: H2Error::EnhanceYourCalm,
6155 ..
6156 })
6157 ));
6158 }
6159
6160 #[test]
6161 fn test_flood_detector_emitted_rst_below_threshold_is_clean() {
6162 // Server may legitimately RST some streams (protocol errors,
6163 // client-side abuse caught by other mitigations). Staying at the
6164 // threshold must not trip the ceiling.
6165 let mut detector = H2FloodDetector::default();
6166 for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6167 assert!(detector.record_rst_emitted().is_none());
6168 }
6169 assert_eq!(
6170 detector.total_rst_streams_emitted_lifetime,
6171 DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME
6172 );
6173 }
6174
6175 #[test]
6176 fn test_flood_detector_emitted_rst_cap_triggers_made_you_reset() {
6177 // CVE-2025-8671 MadeYouReset: unbounded server-emitted RST_STREAM is
6178 // a DoS vector equivalent to Rapid Reset with the emission direction
6179 // flipped. Crossing the ceiling must surface a EnhanceYourCalm
6180 // violation so the caller can GOAWAY.
6181 let mut detector = H2FloodDetector::default();
6182 for _ in 0..DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME {
6183 assert!(detector.record_rst_emitted().is_none());
6184 }
6185 let violation = detector
6186 .record_rst_emitted()
6187 .expect("emitting past the cap should produce a violation");
6188 assert!(matches!(
6189 violation,
6190 H2FloodViolation {
6191 error: H2Error::EnhanceYourCalm,
6192 reason: "MadeYouReset: lifetime server-emitted RST_STREAM",
6193 ..
6194 }
6195 ));
6196 assert_eq!(violation.count, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME + 1);
6197 assert_eq!(violation.threshold, DEFAULT_MAX_RST_STREAM_EMITTED_LIFETIME);
6198 }
6199
6200 #[test]
6201 fn test_flood_detector_emitted_rst_counter_does_not_decay() {
6202 // Unlike the windowed rst_stream_count, the emitted lifetime counter
6203 // is strictly monotonic — a patient attacker cannot reset it by
6204 // waiting out a window. maybe_reset_window must NOT touch it.
6205 let mut detector = H2FloodDetector::default();
6206 for _ in 0..10 {
6207 detector.record_rst_emitted();
6208 }
6209 detector.window_start = Instant::now() - FLOOD_WINDOW_DURATION;
6210 // Force a window reset through check_flood.
6211 let _ = detector.check_flood();
6212 assert_eq!(detector.total_rst_streams_emitted_lifetime, 10);
6213 }
6214
6215 /// Every violation kind must carry a metric_key under the agreed
6216 /// `h2.flood.violation.*` namespace, and the keys must be unique. The
6217 /// statsd counter at `handle_flood_violation` reads `violation.metric_key`
6218 /// directly — drift between the construction site and the metric name
6219 /// would silently lose alerting on a CVE mitigation.
6220 #[test]
6221 fn test_flood_violation_metric_keys_are_unique_and_namespaced() {
6222 // Helper: run `record_rst_lifetime` until it trips, returning the metric_key.
6223 fn key_from_rst_lifetime(response_started: bool) -> &'static str {
6224 let mut detector = H2FloodDetector::default();
6225 loop {
6226 if let Some(v) = detector.record_rst_lifetime(response_started) {
6227 return v.metric_key;
6228 }
6229 }
6230 }
6231
6232 // Helper: run `record_rst_emitted` until it trips, returning the metric_key.
6233 fn key_from_rst_emitted() -> &'static str {
6234 let mut detector = H2FloodDetector::default();
6235 loop {
6236 if let Some(v) = detector.record_rst_emitted() {
6237 return v.metric_key;
6238 }
6239 }
6240 }
6241
6242 // Helper: drive a single `check_flood` counter past its threshold.
6243 fn key_from_check_flood(setup: impl FnOnce(&mut H2FloodDetector)) -> &'static str {
6244 let mut detector = H2FloodDetector::default();
6245 setup(&mut detector);
6246 detector
6247 .check_flood()
6248 .expect("setup should always trip a flood")
6249 .metric_key
6250 }
6251
6252 let keys: [&'static str; 12] = [
6253 // Lifetime methods on the detector itself.
6254 key_from_rst_lifetime(true),
6255 key_from_rst_lifetime(false),
6256 key_from_rst_emitted(),
6257 // `check_flood` arms.
6258 key_from_check_flood(|d| d.rst_stream_count = u32::MAX),
6259 key_from_check_flood(|d| d.ping_count = u32::MAX),
6260 key_from_check_flood(|d| d.total_ping_received_lifetime = u32::MAX),
6261 key_from_check_flood(|d| d.settings_count = u32::MAX),
6262 key_from_check_flood(|d| d.total_settings_received_lifetime = u32::MAX),
6263 key_from_check_flood(|d| d.empty_data_count = u32::MAX),
6264 key_from_check_flood(|d| d.continuation_count = u32::MAX),
6265 key_from_check_flood(|d| d.accumulated_header_size = u32::MAX),
6266 key_from_check_flood(|d| d.glitch_count = u32::MAX),
6267 ];
6268
6269 for key in keys {
6270 assert!(
6271 key.starts_with("h2.flood.violation."),
6272 "metric key {key} is missing the h2.flood.violation. prefix",
6273 );
6274 }
6275 let mut deduped = keys.to_vec();
6276 deduped.sort_unstable();
6277 deduped.dedup();
6278 assert_eq!(
6279 deduped.len(),
6280 keys.len(),
6281 "metric keys must be unique across violation kinds; collisions: {keys:?}",
6282 );
6283 }
6284
6285 /// All four `metric_for_*` helpers must yield distinct, namespaced keys for
6286 /// every RFC 9113 §7 error code. The macro behind them uses `concat!`, so a
6287 /// new H2Error variant fails the build inside the macro — but a typo in
6288 /// the helper prefix would silently land. Walk every (direction × kind)
6289 /// pair and dedupe the set.
6290 /// `h2_frame_rx_metric_key` must yield a distinct `&'static str` per
6291 /// `Frame::*` variant. The single dispatch site in `handle_frame` reads
6292 /// from this helper, so a typo or duplicate would silently clobber the
6293 /// frame-mix dashboard. Asserting the literal set lets us compare against
6294 /// `doc/configure.md` and the RFC 9113 §6 frame catalogue without
6295 /// reconstructing every Frame variant in the test.
6296 #[test]
6297 fn test_h2_frame_rx_metric_keys_are_unique_and_namespaced() {
6298 // Update this list whenever a new Frame variant is added — the helper
6299 // match is also exhaustive, so the build will already break there
6300 // before anyone notices the test missing a key.
6301 let expected: [&'static str; 11] = [
6302 "h2.frames.rx.data",
6303 "h2.frames.rx.headers",
6304 "h2.frames.rx.push_promise",
6305 "h2.frames.rx.priority",
6306 "h2.frames.rx.rst_stream",
6307 "h2.frames.rx.settings",
6308 "h2.frames.rx.ping",
6309 "h2.frames.rx.goaway",
6310 "h2.frames.rx.window_update",
6311 "h2.frames.rx.continuation",
6312 "h2.frames.rx.unknown",
6313 ];
6314
6315 for key in expected {
6316 assert!(
6317 key.starts_with("h2.frames.rx."),
6318 "metric key {key} is missing the h2.frames.rx. prefix",
6319 );
6320 }
6321 let mut deduped = expected.to_vec();
6322 deduped.sort_unstable();
6323 deduped.dedup();
6324 assert_eq!(
6325 deduped.len(),
6326 expected.len(),
6327 "frame-rx metric keys must be unique; collisions in: {expected:?}",
6328 );
6329
6330 // Spot-check the helper for the one variant we can construct without
6331 // borrowing into a frame body — `Frame::Unknown(u8)` is just a tag.
6332 assert_eq!(
6333 h2_frame_rx_metric_key(&Frame::Unknown(42)),
6334 "h2.frames.rx.unknown",
6335 );
6336 }
6337
6338 #[test]
6339 fn test_per_error_code_metric_keys_are_unique_and_namespaced() {
6340 const ALL_ERRORS: [H2Error; 14] = [
6341 H2Error::NoError,
6342 H2Error::ProtocolError,
6343 H2Error::InternalError,
6344 H2Error::FlowControlError,
6345 H2Error::SettingsTimeout,
6346 H2Error::StreamClosed,
6347 H2Error::FrameSizeError,
6348 H2Error::RefusedStream,
6349 H2Error::Cancel,
6350 H2Error::CompressionError,
6351 H2Error::ConnectError,
6352 H2Error::EnhanceYourCalm,
6353 H2Error::InadequateSecurity,
6354 H2Error::HTTP11Required,
6355 ];
6356
6357 let mut keys: Vec<&'static str> = Vec::new();
6358 for error in ALL_ERRORS {
6359 let code = error as u32;
6360 keys.push(metric_for_goaway_sent(error));
6361 keys.push(metric_for_goaway_received(code));
6362 keys.push(metric_for_rst_stream_sent(error));
6363 keys.push(metric_for_rst_stream_received(code));
6364 }
6365 // …plus the four `unknown_error` fallbacks for codes outside RFC 9113 §7.
6366 let unknown_code = 0xff;
6367 assert!(H2Error::try_from(unknown_code).is_err());
6368 keys.push(metric_for_goaway_received(unknown_code));
6369 keys.push(metric_for_rst_stream_received(unknown_code));
6370 // …and the dedicated Rapid Reset signature counter.
6371 keys.push(names::h2::RST_STREAM_RECEIVED_PRE_RESPONSE_START);
6372
6373 for key in &keys {
6374 assert!(
6375 key.starts_with("h2.goaway.sent.")
6376 || key.starts_with("h2.goaway.received.")
6377 || key.starts_with("h2.rst_stream.sent.")
6378 || key.starts_with("h2.rst_stream.received."),
6379 "metric key {key} does not match a known per-error-code namespace",
6380 );
6381 }
6382 let mut deduped = keys.clone();
6383 deduped.sort_unstable();
6384 deduped.dedup();
6385 assert_eq!(
6386 deduped.len(),
6387 keys.len(),
6388 "per-error-code metric keys must be unique; collisions in: {keys:?}",
6389 );
6390 }
6391
6392 #[test]
6393 fn test_flood_detector_response_started_rst_not_abusive() {
6394 // When the backend response has begun, the RST is cheap for us
6395 // too — it only bumps the generic lifetime counter.
6396 let mut detector = H2FloodDetector::default();
6397 for _ in 0..(DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100) {
6398 assert!(detector.record_rst_lifetime(true).is_none());
6399 }
6400 assert_eq!(detector.total_abusive_rst_received_lifetime, 0);
6401 assert_eq!(
6402 detector.total_rst_received_lifetime,
6403 DEFAULT_MAX_RST_STREAM_ABUSIVE_LIFETIME + 100
6404 );
6405 }
6406
6407 #[test]
6408 fn test_flood_detector_default_matches_new_default() {
6409 let from_default = H2FloodDetector::default();
6410 let from_new = H2FloodDetector::new(H2FloodConfig::default());
6411
6412 assert_eq!(from_default.rst_stream_count, from_new.rst_stream_count);
6413 assert_eq!(from_default.ping_count, from_new.ping_count);
6414 assert_eq!(from_default.settings_count, from_new.settings_count);
6415 assert_eq!(from_default.empty_data_count, from_new.empty_data_count);
6416 assert_eq!(from_default.continuation_count, from_new.continuation_count);
6417 assert_eq!(
6418 from_default.accumulated_header_size,
6419 from_new.accumulated_header_size
6420 );
6421 assert_eq!(from_default.glitch_count, from_new.glitch_count);
6422 assert_eq!(from_default.config, from_new.config);
6423 }
6424
6425 // ── Prioriser ────────────────────────────────────────────────────────
6426
6427 #[test]
6428 fn test_prioriser_defaults_for_unknown_stream() {
6429 let p = Prioriser::default();
6430 // Unknown stream -> RFC 9218 defaults: urgency 3, incremental false
6431 assert_eq!(p.get(&1), (3, false));
6432 assert_eq!(p.get(&999), (3, false));
6433 }
6434
6435 #[test]
6436 fn test_prioriser_push_rfc9218_and_get() {
6437 let mut p = Prioriser::default();
6438
6439 let invalid = p.push_priority(
6440 1,
6441 parser::PriorityPart::Rfc9218 {
6442 urgency: 0,
6443 incremental: true,
6444 },
6445 );
6446 assert!(!invalid);
6447 assert_eq!(p.get(&1), (0, true));
6448
6449 let invalid = p.push_priority(
6450 3,
6451 parser::PriorityPart::Rfc9218 {
6452 urgency: 7,
6453 incremental: false,
6454 },
6455 );
6456 assert!(!invalid);
6457 assert_eq!(p.get(&3), (7, false));
6458 }
6459
6460 #[test]
6461 fn test_prioriser_urgency_clamped_to_7() {
6462 let mut p = Prioriser::default();
6463
6464 p.push_priority(
6465 1,
6466 parser::PriorityPart::Rfc9218 {
6467 urgency: 255,
6468 incremental: false,
6469 },
6470 );
6471 assert_eq!(p.get(&1), (7, false));
6472 }
6473
6474 #[test]
6475 fn test_prioriser_update_priority() {
6476 let mut p = Prioriser::default();
6477
6478 p.push_priority(
6479 1,
6480 parser::PriorityPart::Rfc9218 {
6481 urgency: 3,
6482 incremental: false,
6483 },
6484 );
6485 assert_eq!(p.get(&1), (3, false));
6486
6487 // Update same stream
6488 p.push_priority(
6489 1,
6490 parser::PriorityPart::Rfc9218 {
6491 urgency: 1,
6492 incremental: true,
6493 },
6494 );
6495 assert_eq!(p.get(&1), (1, true));
6496 }
6497
6498 #[test]
6499 fn test_prioriser_remove() {
6500 let mut p = Prioriser::default();
6501
6502 p.push_priority(
6503 1,
6504 parser::PriorityPart::Rfc9218 {
6505 urgency: 0,
6506 incremental: true,
6507 },
6508 );
6509 assert_eq!(p.get(&1), (0, true));
6510
6511 p.remove(&1);
6512 // After removal, falls back to defaults
6513 assert_eq!(p.get(&1), (3, false));
6514 }
6515
6516 #[test]
6517 fn test_prioriser_rfc7540_self_dependency() {
6518 let mut p = Prioriser::default();
6519
6520 // Self-dependency should return true (invalid)
6521 let invalid = p.push_priority(
6522 5,
6523 parser::PriorityPart::Rfc7540 {
6524 stream_dependency: parser::StreamDependency {
6525 exclusive: false,
6526 stream_id: 5, // same as stream_id
6527 },
6528 weight: 16,
6529 },
6530 );
6531 assert!(invalid);
6532 }
6533
6534 #[test]
6535 fn test_prioriser_rfc7540_valid_dependency() {
6536 let mut p = Prioriser::default();
6537
6538 // Non-self dependency is valid (but ignored for scheduling)
6539 let invalid = p.push_priority(
6540 5,
6541 parser::PriorityPart::Rfc7540 {
6542 stream_dependency: parser::StreamDependency {
6543 exclusive: false,
6544 stream_id: 3, // different stream
6545 },
6546 weight: 16,
6547 },
6548 );
6549 assert!(!invalid);
6550 // Still returns defaults since RFC 7540 priority is ignored
6551 assert_eq!(p.get(&5), (3, false));
6552 }
6553
6554 #[test]
6555 fn test_prioriser_max_entries_cap() {
6556 let mut p = Prioriser::default();
6557
6558 // Fill up to MAX_PRIORITIES
6559 for i in 0..MAX_PRIORITIES as u32 {
6560 let stream_id = i * 2 + 1; // odd stream IDs
6561 p.push_priority(
6562 stream_id,
6563 parser::PriorityPart::Rfc9218 {
6564 urgency: (i % 8) as u8,
6565 incremental: false,
6566 },
6567 );
6568 }
6569
6570 // Next insert for a new stream should be silently rejected
6571 let next_id = (MAX_PRIORITIES as u32) * 2 + 1;
6572 let invalid = p.push_priority(
6573 next_id,
6574 parser::PriorityPart::Rfc9218 {
6575 urgency: 0,
6576 incremental: true,
6577 },
6578 );
6579 assert!(!invalid); // not a protocol error, just silently dropped
6580 assert_eq!(p.get(&next_id), (3, false)); // defaults, not stored
6581 }
6582
6583 #[test]
6584 fn test_prioriser_update_existing_at_cap() {
6585 let mut p = Prioriser::default();
6586
6587 // Fill to cap
6588 for i in 0..MAX_PRIORITIES as u32 {
6589 p.push_priority(
6590 i * 2 + 1,
6591 parser::PriorityPart::Rfc9218 {
6592 urgency: 3,
6593 incremental: false,
6594 },
6595 );
6596 }
6597
6598 // Updating an existing entry should still work even at cap
6599 p.push_priority(
6600 1,
6601 parser::PriorityPart::Rfc9218 {
6602 urgency: 0,
6603 incremental: true,
6604 },
6605 );
6606 assert_eq!(p.get(&1), (0, true));
6607 }
6608
6609 #[test]
6610 fn test_prioriser_guarded_accepts_open_stream() {
6611 let mut p = Prioriser::default();
6612 let mut open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6613 open.insert(3, 0);
6614 let invalid = p.push_priority_guarded(
6615 3,
6616 parser::PriorityPart::Rfc9218 {
6617 urgency: 1,
6618 incremental: false,
6619 },
6620 7,
6621 &open,
6622 );
6623 assert!(!invalid);
6624 assert_eq!(p.get(&3), (1, false));
6625 }
6626
6627 #[test]
6628 fn test_prioriser_guarded_accepts_idle_lookahead() {
6629 let mut p = Prioriser::default();
6630 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6631 // Just ahead of last_stream_id, within PRIORITY_IDLE_LOOKAHEAD.
6632 let invalid = p.push_priority_guarded(
6633 105,
6634 parser::PriorityPart::Rfc9218 {
6635 urgency: 2,
6636 incremental: true,
6637 },
6638 99,
6639 &open,
6640 );
6641 assert!(!invalid);
6642 assert_eq!(p.get(&105), (2, true));
6643 }
6644
6645 #[test]
6646 fn test_prioriser_guarded_drops_far_future_stream() {
6647 let mut p = Prioriser::default();
6648 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6649 // Beyond the 64-slot lookahead window.
6650 let invalid = p.push_priority_guarded(
6651 1_000_001,
6652 parser::PriorityPart::Rfc9218 {
6653 urgency: 0,
6654 incremental: false,
6655 },
6656 3,
6657 &open,
6658 );
6659 assert!(!invalid); // not a protocol error, just dropped
6660 // Default priority returned — no entry stored.
6661 assert_eq!(p.get(&1_000_001), (DEFAULT_URGENCY, false));
6662 }
6663
6664 #[test]
6665 fn test_prioriser_guarded_drops_closed_past_stream() {
6666 let mut p = Prioriser::default();
6667 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6668 // Past the counter and not open = already closed. Drop.
6669 let invalid = p.push_priority_guarded(
6670 3,
6671 parser::PriorityPart::Rfc9218 {
6672 urgency: 5,
6673 incremental: false,
6674 },
6675 99,
6676 &open,
6677 );
6678 assert!(!invalid);
6679 assert_eq!(p.get(&3), (DEFAULT_URGENCY, false));
6680 }
6681
6682 #[test]
6683 fn test_prioriser_guarded_cannot_flood_with_far_ids() {
6684 // Previously an attacker could pack MAX_PRIORITIES entries by picking
6685 // far-future stream IDs. The guard rejects them before the cap helps.
6686 let mut p = Prioriser::default();
6687 let open: HashMap<StreamId, GlobalStreamId> = HashMap::new();
6688 for delta in 10_000..(10_000 + MAX_PRIORITIES as u32) {
6689 p.push_priority_guarded(
6690 delta,
6691 parser::PriorityPart::Rfc9218 {
6692 urgency: 0,
6693 incremental: false,
6694 },
6695 0,
6696 &open,
6697 );
6698 }
6699 assert_eq!(p.priorities.len(), 0);
6700 }
6701
6702 // ── RFC 9218 §4 round-robin rotation ───────────────────────────────
6703
6704 /// Helper: mark `stream_id` as (urgency, incremental) in the map.
6705 fn set_prio(p: &mut Prioriser, stream_id: StreamId, urgency: u8, incremental: bool) {
6706 p.push_priority(
6707 stream_id,
6708 parser::PriorityPart::Rfc9218 {
6709 urgency,
6710 incremental,
6711 },
6712 );
6713 }
6714
6715 #[test]
6716 fn test_apply_incremental_rotation_all_non_incremental_is_noop() {
6717 // Non-incremental streams keep the existing (urgency, stream_id) sort.
6718 let mut p = Prioriser::default();
6719 set_prio(&mut p, 1, 3, false);
6720 set_prio(&mut p, 3, 3, false);
6721 set_prio(&mut p, 5, 3, false);
6722
6723 let mut buf = vec![1u32, 3, 5];
6724 let count = p.apply_incremental_rotation(&mut buf);
6725 assert_eq!(count, 0);
6726 assert_eq!(buf, vec![1, 3, 5]);
6727 }
6728
6729 #[test]
6730 fn test_apply_incremental_rotation_moves_incremental_to_tail() {
6731 // Within a same-urgency bucket non-incremental must come before
6732 // incremental, each subrange staying ascending.
6733 let mut p = Prioriser::default();
6734 set_prio(&mut p, 1, 3, true);
6735 set_prio(&mut p, 3, 3, false);
6736 set_prio(&mut p, 5, 3, true);
6737 set_prio(&mut p, 7, 3, false);
6738
6739 let mut buf = vec![1u32, 3, 5, 7];
6740 let count = p.apply_incremental_rotation(&mut buf);
6741 assert_eq!(count, 2);
6742 // Non-incremental first (3, 7), then incremental (1, 5) — ascending
6743 // within each subrange before the cursor rotation.
6744 assert_eq!(buf, vec![3, 7, 1, 5]);
6745 }
6746
6747 #[test]
6748 fn test_apply_incremental_rotation_respects_urgency_buckets() {
6749 // Different urgency buckets must not be mixed.
6750 let mut p = Prioriser::default();
6751 set_prio(&mut p, 1, 0, true); // urgent incremental
6752 set_prio(&mut p, 3, 3, false); // default non-incremental
6753 set_prio(&mut p, 5, 3, true); // default incremental
6754 set_prio(&mut p, 7, 5, false); // low-priority non-incremental
6755
6756 // Input is pre-sorted by (urgency, id) as the scheduler does.
6757 let mut buf = vec![1u32, 3, 5, 7];
6758 let count = p.apply_incremental_rotation(&mut buf);
6759 assert_eq!(count, 2);
6760 // Bucket 0: [1] (alone, stays). Bucket 3: [3] non-inc, [5] inc.
6761 // Bucket 5: [7] alone. Cross-bucket order is preserved.
6762 assert_eq!(buf, vec![1, 3, 5, 7]);
6763 }
6764
6765 #[test]
6766 fn test_apply_incremental_rotation_rotates_by_cursor() {
6767 // Three same-urgency incremental streams: cursor advancement shifts
6768 // the bucket so the next pass starts after the previously fired ID.
6769 let mut p = Prioriser::default();
6770 set_prio(&mut p, 1, 3, true);
6771 set_prio(&mut p, 3, 3, true);
6772 set_prio(&mut p, 5, 3, true);
6773
6774 let base = vec![1u32, 3, 5];
6775
6776 // Pass 1: cursor is 0 (initial), so order stays 1, 3, 5.
6777 let mut buf = base.clone();
6778 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6779 assert_eq!(buf, vec![1, 3, 5]);
6780 p.advance_incremental_cursor(Some(1));
6781
6782 // Pass 2: cursor is 1, rotate so 3 comes first.
6783 let mut buf = base.clone();
6784 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6785 assert_eq!(buf, vec![3, 5, 1]);
6786 p.advance_incremental_cursor(Some(3));
6787
6788 // Pass 3: cursor is 3, rotate so 5 comes first.
6789 let mut buf = base.clone();
6790 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6791 assert_eq!(buf, vec![5, 1, 3]);
6792 p.advance_incremental_cursor(Some(5));
6793
6794 // Pass 4: cursor is 5 (largest in bucket), wrap to 1.
6795 let mut buf = base;
6796 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6797 assert_eq!(buf, vec![1, 3, 5]);
6798 }
6799
6800 #[test]
6801 fn test_apply_incremental_rotation_cursor_unknown_id() {
6802 // Cursor points at an ID no longer active (stream completed). Rotation
6803 // should still start from the smallest ID greater than the cursor.
6804 let mut p = Prioriser::default();
6805 set_prio(&mut p, 3, 3, true);
6806 set_prio(&mut p, 5, 3, true);
6807 set_prio(&mut p, 7, 3, true);
6808 p.advance_incremental_cursor(Some(4)); // 4 is not in the bucket
6809
6810 let mut buf = vec![3u32, 5, 7];
6811 assert_eq!(p.apply_incremental_rotation(&mut buf), 3);
6812 assert_eq!(buf, vec![5, 7, 3]);
6813 }
6814
6815 #[test]
6816 fn test_apply_incremental_rotation_single_stream_buckets() {
6817 // Single-stream buckets are a degenerate fast path: no reordering.
6818 let mut p = Prioriser::default();
6819 set_prio(&mut p, 1, 1, true);
6820 set_prio(&mut p, 3, 2, false);
6821 set_prio(&mut p, 5, 3, true);
6822
6823 let mut buf = vec![1u32, 3, 5];
6824 let count = p.apply_incremental_rotation(&mut buf);
6825 assert_eq!(count, 2);
6826 assert_eq!(buf, vec![1, 3, 5]);
6827 }
6828
6829 #[test]
6830 fn test_advance_incremental_cursor_none_is_noop() {
6831 // If no incremental stream fires (only non-incremental served), the
6832 // cursor must stay put so fairness is preserved for the next pass.
6833 let mut p = Prioriser::default();
6834 p.advance_incremental_cursor(Some(5));
6835 p.advance_incremental_cursor(None);
6836 assert_eq!(p.incremental_cursor, 5);
6837 }
6838
6839 #[test]
6840 fn test_apply_incremental_rotation_mixed_bucket_with_cursor() {
6841 // Same-urgency bucket with a mix: non-inc served first in ascending
6842 // order, then the incremental tail rotated by cursor.
6843 let mut p = Prioriser::default();
6844 set_prio(&mut p, 1, 3, true);
6845 set_prio(&mut p, 3, 3, false);
6846 set_prio(&mut p, 5, 3, true);
6847 set_prio(&mut p, 7, 3, false);
6848 set_prio(&mut p, 9, 3, true);
6849 p.advance_incremental_cursor(Some(5));
6850
6851 let mut buf = vec![1u32, 3, 5, 7, 9];
6852 let count = p.apply_incremental_rotation(&mut buf);
6853 assert_eq!(count, 3);
6854 // Non-inc (3, 7) first, then incremental rotated: cursor 5 means
6855 // next-after-5 = 9, then 1, then 5 (wrap).
6856 assert_eq!(buf, vec![3, 7, 9, 1, 5]);
6857 }
6858
6859 // ── H2FlowControl ───────────────────────────────────────────────────
6860
6861 #[test]
6862 fn test_flow_control_initial_state() {
6863 let fc = H2FlowControl {
6864 window: DEFAULT_INITIAL_WINDOW_SIZE as i32,
6865 received_bytes_since_update: 0,
6866 pending_window_updates: HashMap::new(),
6867 };
6868 assert_eq!(fc.window, 65535);
6869 assert_eq!(fc.received_bytes_since_update, 0);
6870 assert!(fc.pending_window_updates.is_empty());
6871 }
6872
6873 #[test]
6874 fn test_flow_control_window_update_coalescing() {
6875 let mut updates: HashMap<u32, u32> = HashMap::new();
6876
6877 // First update for stream 1
6878 updates.insert(1, 1000);
6879 assert_eq!(*updates.get(&1).unwrap(), 1000);
6880
6881 // Coalesce second update for same stream
6882 if let Some(existing) = updates.get_mut(&1) {
6883 *existing = existing.saturating_add(500).min(i32::MAX as u32);
6884 }
6885 assert_eq!(*updates.get(&1).unwrap(), 1500);
6886
6887 // Different stream gets its own entry
6888 updates.insert(3, 2000);
6889 assert_eq!(updates.len(), 2);
6890 assert_eq!(*updates.get(&3).unwrap(), 2000);
6891 }
6892
6893 #[test]
6894 fn test_flow_control_window_update_saturation() {
6895 let mut updates: HashMap<u32, u32> = HashMap::new();
6896
6897 // Insert near max and coalesce — should saturate to i32::MAX
6898 let max_increment = i32::MAX as u32;
6899 updates.insert(1, max_increment - 100);
6900 if let Some(existing) = updates.get_mut(&1) {
6901 *existing = existing.saturating_add(200).min(max_increment);
6902 }
6903 assert_eq!(*updates.get(&1).unwrap(), max_increment);
6904 }
6905
6906 #[test]
6907 fn test_flow_control_connection_window_can_go_negative() {
6908 // RFC 9113 §6.9.2: connection-level window can go negative
6909 let mut fc = H2FlowControl {
6910 window: 100,
6911 received_bytes_since_update: 0,
6912 pending_window_updates: HashMap::new(),
6913 };
6914
6915 // Simulate consuming more than available
6916 fc.window -= 200;
6917 assert_eq!(fc.window, -100);
6918 }
6919
6920 // ── H2FloodConfig ───────────────────────────────────────────────────
6921
6922 #[test]
6923 fn test_flood_config_default_values() {
6924 let config = H2FloodConfig::default();
6925 assert_eq!(config.max_rst_stream_per_window, 100);
6926 assert_eq!(config.max_ping_per_window, 100);
6927 assert_eq!(config.max_settings_per_window, 50);
6928 assert_eq!(config.max_empty_data_per_window, 100);
6929 assert_eq!(config.max_continuation_frames, 20);
6930 assert_eq!(config.max_glitch_count, 100);
6931 assert_eq!(config.max_rst_stream_lifetime, 10_000);
6932 assert_eq!(config.max_rst_stream_abusive_lifetime, 50);
6933 assert_eq!(config.max_header_list_size, MAX_HEADER_LIST_SIZE as u32);
6934 }
6935
6936 // ── distribute_overhead ─────────────────────────────────────────────
6937
6938 #[test]
6939 fn test_distribute_overhead_proportional() {
6940 let mut metrics = SessionMetrics::new(None);
6941 let mut overhead_bin = 1000;
6942 let mut overhead_bout = 500;
6943
6944 // Stream transferred 60% of total bytes (not last stream)
6945 distribute_overhead(
6946 &mut metrics,
6947 &mut overhead_bin,
6948 &mut overhead_bout,
6949 (600, 300), // stream_bytes
6950 (1000, 500), // total_bytes
6951 2, // active_streams
6952 false, // is_last_stream
6953 );
6954
6955 assert_eq!(metrics.bin, 600); // 60% of 1000
6956 assert_eq!(metrics.bout, 300); // 60% of 500
6957 assert_eq!(overhead_bin, 400); // 1000 - 600
6958 assert_eq!(overhead_bout, 200); // 500 - 300
6959 }
6960
6961 #[test]
6962 fn test_distribute_overhead_even_split_when_no_bytes() {
6963 let mut metrics = SessionMetrics::new(None);
6964 let mut overhead_bin = 100;
6965 let mut overhead_bout = 200;
6966
6967 // No bytes transferred -> even distribution (not last stream)
6968 distribute_overhead(
6969 &mut metrics,
6970 &mut overhead_bin,
6971 &mut overhead_bout,
6972 (0, 0), // stream_bytes
6973 (0, 0), // total_bytes
6974 4, // active_streams
6975 false, // is_last_stream
6976 );
6977
6978 assert_eq!(metrics.bin, 25); // 100 / 4
6979 assert_eq!(metrics.bout, 50); // 200 / 4
6980 assert_eq!(overhead_bin, 75);
6981 assert_eq!(overhead_bout, 150);
6982 }
6983
6984 #[test]
6985 fn test_distribute_overhead_clamps_to_remaining() {
6986 let mut metrics = SessionMetrics::new(None);
6987 let mut overhead_bin = 10;
6988 let mut overhead_bout = 10;
6989
6990 // Stream claims 100% of bytes but overhead is small (last stream)
6991 distribute_overhead(
6992 &mut metrics,
6993 &mut overhead_bin,
6994 &mut overhead_bout,
6995 (1000, 1000), // stream_bytes
6996 (1000, 1000), // total_bytes
6997 1, // active_streams
6998 true, // is_last_stream
6999 );
7000
7001 assert_eq!(metrics.bin, 10);
7002 assert_eq!(metrics.bout, 10);
7003 assert_eq!(overhead_bin, 0);
7004 assert_eq!(overhead_bout, 0);
7005 }
7006
7007 #[test]
7008 fn test_distribute_overhead_zero_active_streams() {
7009 let mut metrics = SessionMetrics::new(None);
7010 let mut overhead_bin = 100;
7011 let mut overhead_bout = 100;
7012
7013 // 0 active streams (edge case) — last stream gets all remainder
7014 distribute_overhead(
7015 &mut metrics,
7016 &mut overhead_bin,
7017 &mut overhead_bout,
7018 (0, 0),
7019 (0, 0),
7020 0,
7021 true,
7022 );
7023
7024 assert_eq!(metrics.bin, 100); // last stream gets all remaining
7025 assert_eq!(metrics.bout, 100);
7026 assert_eq!(overhead_bin, 0);
7027 assert_eq!(overhead_bout, 0);
7028 }
7029
7030 #[test]
7031 fn test_distribute_overhead_last_stream_gets_remainder() {
7032 let mut metrics1 = SessionMetrics::new(None);
7033 let mut metrics2 = SessionMetrics::new(None);
7034 let mut overhead_bin = 120;
7035 let mut overhead_bout = 120;
7036
7037 // First stream (not last): gets proportional share
7038 distribute_overhead(
7039 &mut metrics1,
7040 &mut overhead_bin,
7041 &mut overhead_bout,
7042 (100, 100), // stream_bytes
7043 (300, 300), // total_bytes
7044 3, // active_streams
7045 false, // is_last_stream
7046 );
7047
7048 let remaining_bin = overhead_bin;
7049 let remaining_bout = overhead_bout;
7050
7051 // Last stream: gets ALL remaining overhead (no rounding loss)
7052 distribute_overhead(
7053 &mut metrics2,
7054 &mut overhead_bin,
7055 &mut overhead_bout,
7056 (100, 100), // stream_bytes
7057 (300, 300), // total_bytes
7058 3, // active_streams
7059 true, // is_last_stream
7060 );
7061
7062 assert_eq!(metrics2.bin, remaining_bin);
7063 assert_eq!(metrics2.bout, remaining_bout);
7064 assert_eq!(overhead_bin, 0, "no remainder bytes should be lost");
7065 assert_eq!(overhead_bout, 0, "no remainder bytes should be lost");
7066 }
7067
7068 // ── H2FlowControl (additional edge cases) ─────────────────────────
7069
7070 #[test]
7071 fn test_flow_control_queue_window_update_cap() {
7072 // Verify DEFAULT_MAX_PENDING_WINDOW_UPDATES reflects 1 + 4*MAX_CONCURRENT_STREAMS
7073 assert_eq!(DEFAULT_MAX_PENDING_WINDOW_UPDATES, 1 + 100 * 4);
7074
7075 // Simulate queue reaching capacity
7076 let cap = DEFAULT_MAX_PENDING_WINDOW_UPDATES;
7077 let mut updates: HashMap<u32, u32> = HashMap::new();
7078 for i in 0..cap as u32 {
7079 updates.insert(i, 1000);
7080 }
7081 assert_eq!(updates.len(), cap);
7082
7083 // A new stream ID beyond capacity should be rejected
7084 let next_stream = cap as u32;
7085 let at_cap = updates.len() >= cap;
7086 assert!(at_cap);
7087 assert!(!updates.contains_key(&next_stream));
7088
7089 // Verify custom max_concurrent_streams produces proportional cap
7090 let custom_cap = 1 + 500_usize * 4;
7091 assert_eq!(custom_cap, 2001);
7092 }
7093
7094 #[test]
7095 fn test_h2_connection_config_defaults() {
7096 let config = H2ConnectionConfig::default();
7097 assert_eq!(config.initial_connection_window, ENLARGED_CONNECTION_WINDOW);
7098 assert_eq!(
7099 config.max_concurrent_streams,
7100 DEFAULT_MAX_CONCURRENT_STREAMS
7101 );
7102 assert_eq!(config.stream_shrink_ratio, 2);
7103 }
7104
7105 #[test]
7106 fn test_h2_connection_config_clamp_window_lower_bound() {
7107 // Below minimum: clamped to DEFAULT_INITIAL_WINDOW_SIZE (65535)
7108 let config = H2ConnectionConfig::new(100, 100, 2);
7109 assert_eq!(
7110 config.initial_connection_window,
7111 DEFAULT_INITIAL_WINDOW_SIZE
7112 );
7113 }
7114
7115 #[test]
7116 fn test_h2_connection_config_clamp_window_upper_bound() {
7117 // Above maximum: clamped to FLOW_CONTROL_MAX_WINDOW (2^31-1)
7118 let config = H2ConnectionConfig::new(u32::MAX, 100, 2);
7119 assert_eq!(config.initial_connection_window, FLOW_CONTROL_MAX_WINDOW);
7120 }
7121
7122 #[test]
7123 fn test_h2_connection_config_clamp_window_exact_minimum() {
7124 // Exactly minimum: no clamping, no zero-increment WINDOW_UPDATE risk
7125 let config = H2ConnectionConfig::new(DEFAULT_INITIAL_WINDOW_SIZE, 100, 2);
7126 assert_eq!(
7127 config.initial_connection_window,
7128 DEFAULT_INITIAL_WINDOW_SIZE
7129 );
7130 // Increment to send would be 0 — the code guards this with `if increment > 0`
7131 let increment = config
7132 .initial_connection_window
7133 .saturating_sub(DEFAULT_INITIAL_WINDOW_SIZE);
7134 assert_eq!(increment, 0);
7135 }
7136
7137 #[test]
7138 fn test_h2_connection_config_clamp_shrink_ratio() {
7139 // Below minimum: clamped to 2 (1 would defeat recycling)
7140 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 0);
7141 assert_eq!(config.stream_shrink_ratio, 2);
7142 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 100, 1);
7143 assert_eq!(config.stream_shrink_ratio, 2);
7144 }
7145
7146 #[test]
7147 fn test_h2_connection_config_clamp_concurrent_streams() {
7148 let config = H2ConnectionConfig::new(ENLARGED_CONNECTION_WINDOW, 0, 2);
7149 assert_eq!(config.max_concurrent_streams, 1);
7150 }
7151
7152 #[test]
7153 fn test_h2_connection_config_from_optional_uses_defaults() {
7154 let config = H2ConnectionConfig::from_optional(None, None, None);
7155 let defaults = H2ConnectionConfig::default();
7156 assert_eq!(config, defaults);
7157 }
7158
7159 #[test]
7160 fn test_h2_connection_config_from_optional_overrides() {
7161 let config = H2ConnectionConfig::from_optional(Some(2_000_000), Some(500), Some(4));
7162 assert_eq!(config.initial_connection_window, 2_000_000);
7163 assert_eq!(config.max_concurrent_streams, 500);
7164 assert_eq!(config.stream_shrink_ratio, 4);
7165 }
7166
7167 #[test]
7168 fn test_flow_control_window_settings_change_negative() {
7169 // RFC 9113 §6.9.2: A change to SETTINGS_INITIAL_WINDOW_SIZE can cause
7170 // the flow-control window to become negative.
7171 let mut fc = H2FlowControl {
7172 window: 100,
7173 received_bytes_since_update: 0,
7174 pending_window_updates: HashMap::new(),
7175 };
7176
7177 // Simulate SETTINGS_INITIAL_WINDOW_SIZE reduction:
7178 // old_initial = 65535, new_initial = 10 => delta = 10 - 65535 = -65525
7179 let old_initial: i32 = DEFAULT_INITIAL_WINDOW_SIZE as i32;
7180 let new_initial: i32 = 10;
7181 let delta = new_initial - old_initial; // -65525
7182 fc.window += delta;
7183
7184 assert!(
7185 fc.window < 0,
7186 "Window must be able to go negative after settings change"
7187 );
7188 assert_eq!(fc.window, 100 + (10 - 65535));
7189 }
7190
7191 #[test]
7192 fn test_flow_control_coalesce_saturates_at_max_increment() {
7193 let max_increment = i32::MAX as u32;
7194 let mut updates: HashMap<u32, u32> = HashMap::new();
7195
7196 // Insert at max and try to coalesce more
7197 updates.insert(1, max_increment);
7198 if let Some(existing) = updates.get_mut(&1) {
7199 *existing = existing.saturating_add(1000).min(max_increment);
7200 }
7201 assert_eq!(*updates.get(&1).unwrap(), max_increment);
7202 }
7203
7204 // ── H2FloodConfig (additional) ───────────────────────────────────
7205
7206 #[test]
7207 fn test_flood_config_default_matches_constants() {
7208 let config = H2FloodConfig::default();
7209 assert_eq!(
7210 config.max_rst_stream_per_window,
7211 DEFAULT_MAX_RST_STREAM_PER_WINDOW
7212 );
7213 assert_eq!(config.max_ping_per_window, DEFAULT_MAX_PING_PER_WINDOW);
7214 assert_eq!(
7215 config.max_settings_per_window,
7216 DEFAULT_MAX_SETTINGS_PER_WINDOW
7217 );
7218 assert_eq!(
7219 config.max_empty_data_per_window,
7220 DEFAULT_MAX_EMPTY_DATA_PER_WINDOW
7221 );
7222 assert_eq!(
7223 config.max_continuation_frames,
7224 DEFAULT_MAX_CONTINUATION_FRAMES
7225 );
7226 assert_eq!(config.max_glitch_count, DEFAULT_MAX_GLITCH_COUNT);
7227 }
7228
7229 #[test]
7230 fn test_flood_config_equality() {
7231 let config_a = H2FloodConfig::default();
7232 let config_b = H2FloodConfig::default();
7233 assert_eq!(config_a, config_b);
7234
7235 let config_c = H2FloodConfig {
7236 max_rst_stream_per_window: 1,
7237 ..H2FloodConfig::default()
7238 };
7239 assert_ne!(config_a, config_c);
7240 }
7241
7242 // ── distribute_overhead (additional edge cases) ───────────────────
7243
7244 #[test]
7245 fn test_distribute_overhead_asymmetric_in_out() {
7246 let mut metrics = SessionMetrics::new(None);
7247 let mut overhead_bin = 1000;
7248 let mut overhead_bout = 1000;
7249
7250 // Stream transferred 100% inbound, 0% outbound (not last stream)
7251 distribute_overhead(
7252 &mut metrics,
7253 &mut overhead_bin,
7254 &mut overhead_bout,
7255 (500, 0), // stream_bytes
7256 (500, 100), // total_bytes
7257 2, // active_streams
7258 false, // is_last_stream
7259 );
7260
7261 assert_eq!(metrics.bin, 1000); // 100% of inbound overhead
7262 assert_eq!(metrics.bout, 0); // 0% of outbound overhead
7263 assert_eq!(overhead_bin, 0);
7264 assert_eq!(overhead_bout, 1000);
7265 }
7266
7267 #[test]
7268 fn test_distribute_overhead_many_streams_accumulate() {
7269 let mut metrics = SessionMetrics::new(None);
7270 let mut overhead_bin = 120;
7271 let mut overhead_bout = 120;
7272
7273 // Three equal streams, each calling distribute_overhead.
7274 // With is_last_stream on the third call, the last stream gets all
7275 // remaining overhead, so no rounding loss occurs.
7276 // call 1: 120 * 100/300 = 40 -> remaining 80
7277 // call 2: 80 * 100/300 = 26 -> remaining 54
7278 // call 3: last stream gets all remaining = 54
7279 // Total distributed: 40 + 26 + 54 = 120 (no loss)
7280 for i in 0..3 {
7281 distribute_overhead(
7282 &mut metrics,
7283 &mut overhead_bin,
7284 &mut overhead_bout,
7285 (100, 100), // stream_bytes
7286 (300, 300), // total_bytes
7287 3, // active_streams
7288 i == 2, // is_last_stream on final call
7289 );
7290 }
7291
7292 assert_eq!(metrics.bin, 120);
7293 assert_eq!(metrics.bout, 120);
7294 // No rounding residual — last stream absorbed the remainder
7295 assert_eq!(overhead_bin, 0);
7296 assert_eq!(overhead_bout, 0);
7297 }
7298
7299 // ── Hex chunk formatting ────────────────────────────────────────────
7300
7301 /// Verify that the Vec<u8> + write!() hex formatting used in
7302 /// handle_data_frame produces output identical to format!("{:x}").
7303 #[test]
7304 fn test_hex_chunk_length_formatting() {
7305 use std::io::Write as _;
7306
7307 let cases: &[(usize, &[u8])] = &[
7308 (1, b"1"),
7309 (15, b"f"),
7310 (16, b"10"),
7311 (255, b"ff"),
7312 (256, b"100"),
7313 (4096, b"1000"),
7314 (65535, b"ffff"),
7315 (65536, b"10000"),
7316 ];
7317
7318 for &(payload_len, expected) in cases {
7319 let mut buf = Vec::with_capacity(16);
7320 let _ = write!(buf, "{payload_len:x}");
7321 assert_eq!(
7322 buf, expected,
7323 "hex formatting mismatch for payload_len={payload_len}"
7324 );
7325 }
7326
7327 // usize::MAX tested separately to avoid temporary lifetime issue
7328 let max_expected = format!("{:x}", usize::MAX);
7329 let mut buf = Vec::with_capacity(16);
7330 let _ = write!(buf, "{:x}", usize::MAX);
7331 assert_eq!(buf, max_expected.as_bytes());
7332 }
7333
7334 // ── Stream-ID allocation / exhaustion ──────────────────────────────────
7335
7336 /// A fresh client connection starts with `last_stream_id == 0`. The first
7337 /// call MUST issue stream `1` (odd, RFC 9113 §5.1.1) and advance the
7338 /// watermark to `2`.
7339 #[test]
7340 fn test_next_stream_id_client_first_allocation() {
7341 let (issued, next) = next_stream_id(0, true).expect("fresh client must allocate");
7342 assert_eq!(issued, 1);
7343 assert_eq!(next, 2);
7344 }
7345
7346 /// Client allocation yields strictly increasing odd identifiers
7347 /// (1, 3, 5, ...) as required by RFC 9113 §5.1.1.
7348 #[test]
7349 fn test_next_stream_id_client_sequence_is_odd_and_monotonic() {
7350 let mut last = 0u32;
7351 let mut issued_ids = Vec::with_capacity(8);
7352 for _ in 0..8 {
7353 let (id, next) = next_stream_id(last, true).expect("unexhausted");
7354 assert_eq!(id & 1, 1, "client stream ids must be odd (RFC 9113 §5.1.1)");
7355 assert!(issued_ids.last().is_none_or(|prev: &u32| id > *prev));
7356 issued_ids.push(id);
7357 last = next;
7358 }
7359 assert_eq!(issued_ids, vec![1, 3, 5, 7, 9, 11, 13, 15]);
7360 }
7361
7362 /// Server-side allocation yields even identifiers. The helper
7363 /// convention is `watermark - 2` for server, `watermark - 1` for client,
7364 /// so both sides share the same monotonically-increasing even watermark.
7365 /// Sōzu never server-pushes, but the helper must be symmetric so push
7366 /// could be enabled without a regression.
7367 #[test]
7368 fn test_next_stream_id_server_is_even() {
7369 // `last = 2` means the most recent allocation advanced the watermark
7370 // to 2; server then issues `2 - 2 = 0`. This is an artefact of the
7371 // shared watermark and only matters in tests — server never uses it.
7372 let (issued, next) = next_stream_id(2, false).expect("server allocation");
7373 assert_eq!(issued & 1, 0, "server stream ids must be even");
7374 assert_eq!(next, 4);
7375 assert_eq!(issued, 2);
7376
7377 let (issued, next) = next_stream_id(next, false).expect("second slot");
7378 assert_eq!(issued, 4);
7379 assert_eq!(issued & 1, 0);
7380 assert_eq!(next, 6);
7381 }
7382
7383 /// The last client-issuable odd stream ID is `STREAM_ID_MAX = 0x7FFF_FFFF`.
7384 /// To issue it the watermark must advance to `STREAM_ID_MAX + 1 = 2³¹`;
7385 /// the caller therefore supplies `last = STREAM_ID_MAX - 1 = 0x7FFF_FFFE`.
7386 /// That call MUST succeed and return the max ID; the post-call watermark
7387 /// sits at `2³¹`, which is the sentinel that makes the next call fail.
7388 #[test]
7389 fn test_next_stream_id_client_final_slot_allocates() {
7390 let last = STREAM_ID_MAX - 1;
7391 let (issued, next) = next_stream_id(last, true).expect("final slot still allocates");
7392 assert_eq!(issued, STREAM_ID_MAX);
7393 assert_eq!(next, STREAM_ID_MAX + 1);
7394 // And the very next call MUST refuse rather than wrap.
7395 assert!(next_stream_id(next, true).is_none());
7396 }
7397
7398 /// Exhaustion case: once the client has issued stream ID `STREAM_ID_MAX`,
7399 /// the watermark sits at `STREAM_ID_MAX + 1`. The next request MUST return
7400 /// `None` — without this guard the helper would issue `STREAM_ID_MAX + 2`
7401 /// (wrapped down to an even id), which would (a) use the reserved
7402 /// high bit and (b) violate the odd-parity invariant for client streams.
7403 #[test]
7404 fn test_next_stream_id_client_exhausted_returns_none() {
7405 let last = STREAM_ID_MAX + 1;
7406 assert!(next_stream_id(last, true).is_none());
7407 }
7408
7409 /// Exhaustion via `checked_add` saturation: defence in depth in case a
7410 /// caller jumps `last_stream_id` close to `u32::MAX`. The helper must
7411 /// not panic nor overflow — it must return `None`.
7412 #[test]
7413 fn test_next_stream_id_saturates_near_u32_max() {
7414 assert!(next_stream_id(u32::MAX, true).is_none());
7415 assert!(next_stream_id(u32::MAX - 1, true).is_none());
7416 }
7417
7418 /// Server-side exhaustion: same guard, even-parity identifier space.
7419 #[test]
7420 fn test_next_stream_id_server_exhausted_returns_none() {
7421 let last = STREAM_ID_MAX + 1;
7422 assert!(next_stream_id(last, false).is_none());
7423 }
7424
7425 /// Regression guard: the helper must never issue a stream ID that
7426 /// exceeds `STREAM_ID_MAX` for either side, no matter where the
7427 /// watermark sits. This walks every value in a neighbourhood of the
7428 /// boundary to rule out off-by-one errors.
7429 #[test]
7430 fn test_next_stream_id_never_exceeds_stream_id_max() {
7431 for last in (STREAM_ID_MAX - 4)..=(STREAM_ID_MAX + 4) {
7432 for is_client in [true, false] {
7433 if let Some((issued, next)) = next_stream_id(last, is_client) {
7434 assert!(
7435 issued <= STREAM_ID_MAX,
7436 "issued id {issued} exceeds STREAM_ID_MAX (last={last}, is_client={is_client})"
7437 );
7438 // `next` is the post-allocation watermark and may sit at
7439 // STREAM_ID_MAX + 1 — the very next call must then return None.
7440 if next > STREAM_ID_MAX {
7441 assert!(
7442 next_stream_id(next, is_client).is_none(),
7443 "second call after final slot must report exhaustion"
7444 );
7445 }
7446 }
7447 }
7448 }
7449 }
7450
7451 /// The helper's `is_client` flag must cleanly split the ID space so that
7452 /// a client and a server peered on the same connection cannot collide.
7453 /// Given the same `last_stream_id`, the two parities must differ by 1.
7454 #[test]
7455 fn test_next_stream_id_client_server_parities_disjoint() {
7456 for last in [0u32, 2, 4, 10, 100, 1_000_000, STREAM_ID_MAX - 3] {
7457 let (client_id, _) = next_stream_id(last, true).unwrap();
7458 let (server_id, _) = next_stream_id(last, false).unwrap();
7459 assert_eq!(client_id & 1, 1);
7460 assert_eq!(server_id & 1, 0);
7461 assert_eq!(client_id.abs_diff(server_id), 1);
7462 }
7463 }
7464
7465 // ── LIFECYCLE §9 invariant 16: any_stream_id_matches ─────────────────
7466 //
7467 // Covers the iteration dispatch used by `any_stream_has_pending_back`.
7468 // Testing the probe directly against a synthetic closure keeps the
7469 // tests independent of the full `Stream` fixture (which requires a
7470 // `Pool` and a fully-built `HttpContext`).
7471
7472 #[test]
7473 fn test_any_stream_id_matches_empty_map_is_false() {
7474 let streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7475 assert!(!any_stream_id_matches(&streams, |_| true));
7476 }
7477
7478 #[test]
7479 fn test_any_stream_id_matches_all_probe_false_is_false() {
7480 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7481 streams.insert(1, 0);
7482 streams.insert(3, 1);
7483 streams.insert(5, 2);
7484 assert!(!any_stream_id_matches(&streams, |_| false));
7485 }
7486
7487 #[test]
7488 fn test_any_stream_id_matches_any_probe_true_is_true() {
7489 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7490 streams.insert(1, 0);
7491 streams.insert(3, 1);
7492 streams.insert(5, 2);
7493 // Probe is true only for GlobalStreamId == 1 (i.e. StreamId 3).
7494 assert!(any_stream_id_matches(&streams, |gid| gid == 1));
7495 }
7496
7497 #[test]
7498 fn test_any_stream_id_matches_single_entry() {
7499 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7500 streams.insert(42, 7);
7501 assert!(any_stream_id_matches(&streams, |gid| gid == 7));
7502 assert!(!any_stream_id_matches(&streams, |gid| gid == 8));
7503 }
7504
7505 #[test]
7506 fn test_any_stream_id_matches_short_circuits() {
7507 let mut streams: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7508 streams.insert(1, 0);
7509 streams.insert(3, 1);
7510 streams.insert(5, 2);
7511 streams.insert(7, 3);
7512 let mut calls = 0usize;
7513 let result = any_stream_id_matches(&streams, |_| {
7514 calls += 1;
7515 true
7516 });
7517 assert!(result);
7518 // `Iterator::any` short-circuits on the first `true` — so the probe
7519 // must fire at most once in this construction.
7520 assert_eq!(calls, 1);
7521 }
7522
7523 // ── LIFECYCLE §9 invariant 16: any_stream_has_pending_back ───────────
7524
7525 /// Build a minimal `Stream` for invariant-16 probing. Uses the pool
7526 /// plumbing so `back.blocks` / `back.out` exist; every other field is
7527 /// default-valued because the predicate only reads the back buffer.
7528 fn make_stream_for_invariant_16(pool: &Rc<RefCell<Pool>>, session_ulid: Ulid) -> Stream {
7529 let http_ctx = HttpContext {
7530 keep_alive_backend: true,
7531 keep_alive_frontend: true,
7532 sticky_session_found: None,
7533 method: None,
7534 authority: None,
7535 path: None,
7536 status: None,
7537 reason: None,
7538 user_agent: None,
7539 x_request_id: None,
7540 xff_chain: None,
7541 #[cfg(feature = "opentelemetry")]
7542 otel: None,
7543 closing: false,
7544 session_id: session_ulid,
7545 id: Ulid::generate(),
7546 backend_id: None,
7547 cluster_id: None,
7548 protocol: Protocol::HTTPS,
7549 public_address: "127.0.0.1:0".parse().unwrap(),
7550 session_address: None,
7551 sticky_name: String::new(),
7552 sticky_session: None,
7553 backend_address: None,
7554 tls_server_name: None,
7555 tls_cert_names: None,
7556 strict_sni_binding: false,
7557 elide_x_real_ip: false,
7558 send_x_real_ip: false,
7559 tls_version: None,
7560 tls_cipher: None,
7561 tls_alpn: None,
7562 sozu_id_header: String::from("Sozu-Id"),
7563 redirect_location: None,
7564 www_authenticate: None,
7565 original_authority: None,
7566 headers_response: Vec::new(),
7567 retry_after_seconds: None,
7568 frontend_redirect_template: None,
7569 redirect_status: None,
7570 access_log_message: None,
7571 };
7572 Stream::new(Rc::downgrade(pool), http_ctx, 65_535)
7573 .expect("pool should have capacity for two buffers")
7574 }
7575
7576 fn make_pool_for_invariant_16() -> Rc<RefCell<Pool>> {
7577 // Two buffer slots per stream (front + back), ten stream slots is
7578 // plenty for the tests below.
7579 Rc::new(RefCell::new(Pool::with_capacity(4, 20, 16_384)))
7580 }
7581
7582 #[test]
7583 fn test_any_stream_has_pending_back_empty_map_is_false() {
7584 let pool = make_pool_for_invariant_16();
7585 let ulid = Ulid::generate();
7586 let streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7587 let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
7588 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7589 }
7590
7591 #[test]
7592 fn test_any_stream_has_pending_back_all_drained_is_false() {
7593 let pool = make_pool_for_invariant_16();
7594 let ulid = Ulid::generate();
7595 let context_streams = vec![
7596 make_stream_for_invariant_16(&pool, ulid),
7597 make_stream_for_invariant_16(&pool, ulid),
7598 ];
7599 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7600 streams_map.insert(1, 0);
7601 streams_map.insert(3, 1);
7602 // Both freshly-built streams have empty back.out and back.blocks
7603 // (Kawa::new starts with empty deques).
7604 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7605 }
7606
7607 #[test]
7608 fn test_any_stream_has_pending_back_unknown_gid_is_false() {
7609 // LIFECYCLE invariant 16 defence-in-depth: an unknown
7610 // `GlobalStreamId` during a stream-removal race must not panic;
7611 // `.get()` must short-circuit to `false`.
7612 let pool = make_pool_for_invariant_16();
7613 let ulid = Ulid::generate();
7614 let context_streams = vec![make_stream_for_invariant_16(&pool, ulid)];
7615 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7616 // GlobalStreamId 42 is out of range for the 1-element slice above.
7617 streams_map.insert(7, 42);
7618 assert!(!any_stream_has_pending_back(&streams_map, &context_streams));
7619 }
7620
7621 #[test]
7622 fn test_any_stream_has_pending_back_with_pending_blocks_is_true() {
7623 let pool = make_pool_for_invariant_16();
7624 let ulid = Ulid::generate();
7625 let mut stream = make_stream_for_invariant_16(&pool, ulid);
7626 // Push one dummy block — any Block variant is fine; the predicate
7627 // only checks `blocks.is_empty()`.
7628 stream.back.blocks.push_back(kawa::Block::StatusLine);
7629 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7630 streams_map.insert(1, 0);
7631 assert!(any_stream_has_pending_back(&streams_map, &[stream]));
7632 }
7633
7634 #[test]
7635 fn test_any_stream_has_pending_back_with_pending_out_is_true() {
7636 let pool = make_pool_for_invariant_16();
7637 let ulid = Ulid::generate();
7638 let mut stream = make_stream_for_invariant_16(&pool, ulid);
7639 // Non-empty out buffer with no blocks.
7640 stream
7641 .back
7642 .out
7643 .push_back(kawa::OutBlock::Store(kawa::Store::Static(b"partial frame")));
7644 let mut streams_map: HashMap<StreamId, GlobalStreamId> = HashMap::new();
7645 streams_map.insert(1, 0);
7646 assert!(any_stream_has_pending_back(&streams_map, &[stream]));
7647 }
7648
7649 // ── ready_incremental_by_urgency mid-pass consistency ────────────────
7650 //
7651 // The full RED is in e2e and currently #[ignore]'d (timing-sensitive).
7652 // The scalar logic below pins the saturating_sub + bucket-scoped
7653 // decrement contract the scheduler at h2.rs:2412-2414 + h2.rs:2481
7654 // relies on: a same-urgency transition-to-ineligible MUST drop the
7655 // per-bucket count by exactly 1 and never underflow the u64.
7656
7657 fn make_bucket(counts: &[(u8, usize)]) -> HashMap<u8, usize> {
7658 counts.iter().copied().collect()
7659 }
7660
7661 #[test]
7662 fn ready_incremental_bucket_decrement_reduces_same_urgency_only() {
7663 let mut map = make_bucket(&[(1, 3), (3, 2)]);
7664 let urgency: u8 = 1;
7665 let is_incremental = true;
7666 // Simulate a stream in urgency=1 going ineligible mid-pass.
7667 if is_incremental {
7668 if let Some(c) = map.get_mut(&urgency) {
7669 *c = c.saturating_sub(1);
7670 }
7671 }
7672 assert_eq!(map.get(&1), Some(&2), "urgency-1 bucket must drop to 2");
7673 assert_eq!(map.get(&3), Some(&2), "urgency-3 bucket untouched");
7674 }
7675
7676 #[test]
7677 fn ready_incremental_bucket_decrement_saturates_at_zero() {
7678 let mut map = make_bucket(&[(0, 0)]);
7679 let urgency: u8 = 0;
7680 if let Some(c) = map.get_mut(&urgency) {
7681 *c = c.saturating_sub(1);
7682 }
7683 assert_eq!(map.get(&0), Some(&0), "saturating_sub must not underflow");
7684 }
7685
7686 #[test]
7687 fn ready_incremental_bucket_decrement_skipped_for_non_incremental() {
7688 let mut map = make_bucket(&[(1, 3)]);
7689 let is_incremental = false;
7690 if is_incremental {
7691 if let Some(c) = map.get_mut(&1) {
7692 *c = c.saturating_sub(1);
7693 }
7694 }
7695 assert_eq!(
7696 map.get(&1),
7697 Some(&3),
7698 "non-incremental transitions must not touch the bucket"
7699 );
7700 }
7701
7702 // ── enqueue_rst: queue / dedupe / counter / arm invariants ───────────
7703 //
7704 // `enqueue_rst_into` is the free-function primitive shared by all three
7705 // RST push sites (DATA-on-closed, refuse_stream_and_discard,
7706 // reset_stream). The method delegates; the invariants live here.
7707
7708 #[test]
7709 fn test_enqueue_rst_into_populates_queue_and_dedupe() {
7710 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7711 let mut total: usize = 0;
7712 let mut sent: HashSet<StreamId> = HashSet::new();
7713 let mut readiness = Readiness::new();
7714
7715 let first = enqueue_rst_into(
7716 &mut pending,
7717 &mut total,
7718 &mut sent,
7719 &mut readiness,
7720 5,
7721 H2Error::ProtocolError,
7722 );
7723 assert!(first, "first call must report freshly_queued = true");
7724 // Second call for the same stream must be a no-op AND return
7725 // false so accounting in `Self::enqueue_rst` skips this case.
7726 let second = enqueue_rst_into(
7727 &mut pending,
7728 &mut total,
7729 &mut sent,
7730 &mut readiness,
7731 5,
7732 H2Error::InternalError,
7733 );
7734 assert!(
7735 !second,
7736 "second call for same stream must return freshly_queued = false"
7737 );
7738
7739 assert_eq!(pending.len(), 1, "dedupe must collapse to a single entry");
7740 assert_eq!(
7741 pending[0],
7742 (5, H2Error::ProtocolError),
7743 "the first error wins — second push is ignored"
7744 );
7745 assert_eq!(total, 1, "queued-cap counter must bump exactly once");
7746 assert!(sent.contains(&5), "rst_sent must record the id");
7747 }
7748
7749 #[test]
7750 fn test_enqueue_rst_into_bumps_total_for_distinct_ids() {
7751 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7752 let mut total: usize = 0;
7753 let mut sent: HashSet<StreamId> = HashSet::new();
7754 let mut readiness = Readiness::new();
7755
7756 for sid in [1u32, 3, 5, 7] {
7757 enqueue_rst_into(
7758 &mut pending,
7759 &mut total,
7760 &mut sent,
7761 &mut readiness,
7762 sid,
7763 H2Error::ProtocolError,
7764 );
7765 }
7766
7767 assert_eq!(pending.len(), 4);
7768 assert_eq!(total, 4);
7769 assert_eq!(sent.len(), 4);
7770 }
7771
7772 #[test]
7773 fn test_enqueue_rst_into_arms_writable_in_invariant_15_form() {
7774 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7775 let mut total: usize = 0;
7776 let mut sent: HashSet<StreamId> = HashSet::new();
7777 let mut readiness = Readiness::new();
7778
7779 // Precondition: no WRITABLE bits set.
7780 assert!(!readiness.interest.is_writable());
7781 assert!(!readiness.event.is_writable());
7782
7783 enqueue_rst_into(
7784 &mut pending,
7785 &mut total,
7786 &mut sent,
7787 &mut readiness,
7788 9,
7789 H2Error::FlowControlError,
7790 );
7791
7792 // Postcondition: invariant-15 — both `interest` and `event` WRITABLE
7793 // are raised so the next tick runs `writable()` under edge-triggered
7794 // epoll.
7795 assert!(
7796 readiness.interest.is_writable(),
7797 "arm_writable must raise the interest bit"
7798 );
7799 assert!(
7800 readiness.event.is_writable(),
7801 "arm_writable must raise the event bit (edge-triggered epoll)"
7802 );
7803 }
7804
7805 #[test]
7806 fn test_enqueue_rst_into_dedupe_does_not_rearm_writable() {
7807 // Dedupe is a pure short-circuit: if the stream id is already in
7808 // `rst_sent`, we do not touch the readiness. This matters because
7809 // a re-entrant reset_stream call during a cascading error path
7810 // would otherwise re-raise WRITABLE unnecessarily — harmless but
7811 // noisy in metrics.
7812 let mut pending: Vec<(StreamId, H2Error)> = Vec::new();
7813 let mut total: usize = 0;
7814 let mut sent: HashSet<StreamId> = HashSet::new();
7815 sent.insert(11);
7816 let mut readiness = Readiness::new();
7817
7818 enqueue_rst_into(
7819 &mut pending,
7820 &mut total,
7821 &mut sent,
7822 &mut readiness,
7823 11,
7824 H2Error::ProtocolError,
7825 );
7826
7827 assert!(
7828 pending.is_empty(),
7829 "already-sent ids must not queue a second frame"
7830 );
7831 assert_eq!(total, 0);
7832 assert!(!readiness.interest.is_writable());
7833 assert!(!readiness.event.is_writable());
7834 }
7835
7836 // ── forcefully_terminate_answer arms WRITABLE for ET epoll ───────────
7837 //
7838 // Gap A in the h2spec diagnosis: the pre-fix code set `interest` but
7839 // never raised `event`, so `filter_interest() = event & interest` was
7840 // zero and `writable()` was never scheduled. This test pins the fix.
7841
7842 #[test]
7843 fn test_forcefully_terminate_answer_arms_event_and_interest() {
7844 let pool = make_pool_for_invariant_16();
7845 let ulid = Ulid::generate();
7846 let mut stream = make_stream_for_invariant_16(&pool, ulid);
7847 let mut readiness = Readiness::new();
7848
7849 assert!(!readiness.interest.is_writable());
7850 assert!(!readiness.event.is_writable());
7851
7852 forcefully_terminate_answer(&mut stream, &mut readiness, H2Error::ProtocolError);
7853
7854 assert!(
7855 readiness.interest.is_writable(),
7856 "forcefully_terminate_answer must set the WRITABLE interest bit"
7857 );
7858 assert!(
7859 readiness.event.is_writable(),
7860 "forcefully_terminate_answer must set the WRITABLE event bit — \
7861 without this, filter_interest() = 0 under edge-triggered epoll \
7862 and writable() is never scheduled (h2spec Gap A)"
7863 );
7864 }
7865}