reddb_server/server/header_escape_guard.rs
1//! `HeaderEscapeGuard` — typed boundary guard for HTTP response header values.
2//!
3//! Per ADR 0010 (`docs/adr/0010-serialization-boundary-discipline.md`)
4//! and issue #176, the producing side of every serialization boundary
5//! is owned by a typed guard whose only job is to know the boundary's
6//! escape contract. This module is the guard for HTTP response header
7//! values.
8//!
9//! ## Why this guard exists
10//!
11//! HTTP/1.1 frames headers as `name: value\r\n` pairs terminated by a
12//! double `\r\n`. If a header value contains a raw CR or LF, an
13//! attacker can splice a second header (or the entire body) past the
14//! original framing — the classic CRLF-injection / response-splitting
15//! shape called out by the Whiz / Babeld disclosure (March 2026).
16//!
17//! ## Contract
18//!
19//! `HeaderEscapeGuard::header_value(s)` returns a typed
20//! `http::HeaderValue` if and only if `s` is safe for an HTTP/1.1
21//! response header value:
22//!
23//! - No CR (`\r`) or LF (`\n`) — these terminate the header line.
24//! - No NUL (`\0`) — proxies and intermediaries truncate on NUL.
25//! - No tab (`\t`) — RFC 7230 admits HTAB inside header values, but
26//! it is the most common smuggling lever for downstream log
27//! pipelines that split on whitespace, and there is no legitimate
28//! producer-side reason for RedDB to emit one.
29//! - No other ASCII control byte (0x00–0x1F, 0x7F).
30//! - Bounded length: 8 KiB ceiling per value. Real HTTP intermediaries
31//! start dropping connections well before this; the guard rejects
32//! early so a misuse becomes a typed error, not a runtime hang.
33//!
34//! Non-ASCII bytes (0x80–0xFF) are *permitted* — RFC 7230 §3.2.6
35//! discourages them but does not forbid them, and `http::HeaderValue`
36//! accepts them. Producers should emit ASCII; the guard does not
37//! police that.
38//!
39//! ## Failure mode
40//!
41//! Every rejection path returns a typed `EscapeError`. Callers must
42//! propagate the error to the HTTP boundary — the guard never silently
43//! truncates, replaces, or escapes-around a control byte. Silent
44//! mangling at this layer is the exact failure shape ADR 0010 is
45//! designed to prevent.
46//!
47//! ## Out of scope
48//!
49//! - Header *names*. RedDB sets header names from `&'static str`
50//! literals only; the names live in source code, not in user input.
51//! If a future surface admits user-supplied header names, that
52//! needs its own guard.
53//! - Request-side headers. Inbound parsing already happens in
54//! `transport::HttpRequest::read_from`; the inbound parser is a
55//! separate concern.
56
57use std::fmt;
58
59use http::HeaderValue;
60
61/// Maximum permitted header value length, in bytes.
62///
63/// Chosen to be permissive enough for any realistic header value
64/// (URLs, JWT tokens, Set-Cookie payloads with attributes) yet small
65/// enough that a misuse — an attacker pushing megabytes through a
66/// header — surfaces as a typed error long before it eats memory or
67/// stalls the connection. 8 KiB matches the `request headers too
68/// large` ceiling already enforced by `HttpRequest::read_from` for
69/// inbound headers, keeping the inbound and outbound limits
70/// symmetric.
71pub const MAX_HEADER_VALUE_BYTES: usize = 8 * 1024;
72
73/// Reasons `HeaderEscapeGuard::header_value` rejects a string.
74///
75/// Each variant names the exact byte class that triggered the
76/// rejection so the caller can build a useful 4xx / 500 response and
77/// the audit log gets a structured diagnostic, not a hand-formatted
78/// string. The byte payload on `ContainsNonPrintable` is the
79/// offending byte itself, useful for debug logs and for tests
80/// asserting the guard caught the right byte.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum EscapeError {
83 /// Value contained CR (`\r`) or LF (`\n`).
84 ContainsCrlf,
85 /// Value contained NUL (`\0`).
86 ContainsNull,
87 /// Value contained HTAB (`\t`).
88 ContainsTab,
89 /// Value contained another non-printable ASCII byte
90 /// (0x01–0x08, 0x0B, 0x0C, 0x0E–0x1F, or 0x7F). The payload is
91 /// the offending byte for diagnostic clarity.
92 ContainsNonPrintable(u8),
93 /// Value exceeds [`MAX_HEADER_VALUE_BYTES`]. The payload is the
94 /// observed length so the caller can include it in the error
95 /// reply.
96 OversizeForBoundary(usize),
97}
98
99impl fmt::Display for EscapeError {
100 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101 match self {
102 Self::ContainsCrlf => {
103 f.write_str("header value contains CR or LF (would smuggle a second header line)")
104 }
105 Self::ContainsNull => f.write_str(
106 "header value contains NUL (proxies and intermediaries truncate on NUL)",
107 ),
108 Self::ContainsTab => f.write_str(
109 "header value contains TAB (downstream log pipelines split on whitespace)",
110 ),
111 Self::ContainsNonPrintable(b) => {
112 write!(f, "header value contains non-printable byte 0x{b:02X}")
113 }
114 Self::OversizeForBoundary(n) => write!(
115 f,
116 "header value length {n} exceeds the {MAX_HEADER_VALUE_BYTES}-byte boundary limit"
117 ),
118 }
119 }
120}
121
122impl std::error::Error for EscapeError {}
123
124/// Typed guard for HTTP response header values.
125///
126/// The struct is zero-sized; it exists for the namespace and for
127/// future extensions (e.g., per-boundary length policies). Callers
128/// invoke the guard exclusively through associated functions.
129///
130/// ```ignore
131/// use crate::server::header_escape_guard::{HeaderEscapeGuard, EscapeError};
132///
133/// let value = HeaderEscapeGuard::header_value("max-age=3600")?;
134/// // value is now an `http::HeaderValue` safe to attach to a
135/// // response. Attempting to splice a second header line is
136/// // rejected at the type boundary:
137/// assert!(matches!(
138/// HeaderEscapeGuard::header_value("evil\r\nX-Forged: 1"),
139/// Err(EscapeError::ContainsCrlf),
140/// ));
141/// # Ok::<(), EscapeError>(())
142/// ```
143pub struct HeaderEscapeGuard;
144
145impl HeaderEscapeGuard {
146 /// Validate `s` and wrap it in a typed `http::HeaderValue`.
147 ///
148 /// Returns the typed error (`EscapeError`) on the first byte that
149 /// violates the contract. The order of checks is: oversize →
150 /// CRLF → NUL → TAB → other non-printable. Callers must not
151 /// assume the order — only that some violation triggered the
152 /// rejection.
153 pub fn header_value(s: &str) -> Result<HeaderValue, EscapeError> {
154 let bytes = s.as_bytes();
155 if bytes.len() > MAX_HEADER_VALUE_BYTES {
156 return Err(EscapeError::OversizeForBoundary(bytes.len()));
157 }
158 for &b in bytes {
159 match b {
160 b'\r' | b'\n' => return Err(EscapeError::ContainsCrlf),
161 0 => return Err(EscapeError::ContainsNull),
162 b'\t' => return Err(EscapeError::ContainsTab),
163 // Other ASCII control bytes: 0x01–0x08, 0x0B, 0x0C,
164 // 0x0E–0x1F, plus DEL (0x7F).
165 0x01..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F | 0x7F => {
166 return Err(EscapeError::ContainsNonPrintable(b));
167 }
168 _ => {}
169 }
170 }
171 // SAFETY-equivalent: every byte we accepted is a printable
172 // ASCII byte, a space, or 0x80..=0xFF — all of which
173 // `HeaderValue::from_bytes` accepts. The construction can
174 // only fail for the exact bytes we already rejected, so an
175 // error here is unreachable in well-formed code; we surface
176 // it as the closest typed error rather than panicking so a
177 // future tightening of `http`'s rules degrades gracefully.
178 HeaderValue::from_bytes(bytes).map_err(|_| EscapeError::ContainsNonPrintable(0))
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 // --- Happy paths --------------------------------------------------
187
188 #[test]
189 fn accepts_simple_ascii() {
190 let v = HeaderEscapeGuard::header_value("application/json").unwrap();
191 assert_eq!(v.as_bytes(), b"application/json");
192 }
193
194 #[test]
195 fn accepts_empty_string() {
196 // RFC 7230 §3.2 admits empty header values.
197 let v = HeaderEscapeGuard::header_value("").unwrap();
198 assert_eq!(v.as_bytes(), b"");
199 }
200
201 #[test]
202 fn accepts_value_with_spaces_and_punctuation() {
203 let v = HeaderEscapeGuard::header_value("text/html; charset=utf-8, q=0.9").unwrap();
204 assert_eq!(v.as_bytes(), b"text/html; charset=utf-8, q=0.9");
205 }
206
207 #[test]
208 fn accepts_max_length_value() {
209 let s = "a".repeat(MAX_HEADER_VALUE_BYTES);
210 let v = HeaderEscapeGuard::header_value(&s).unwrap();
211 assert_eq!(v.as_bytes().len(), MAX_HEADER_VALUE_BYTES);
212 }
213
214 #[test]
215 fn accepts_high_bit_bytes() {
216 // 0x80..=0xFF are discouraged by RFC 7230 but not forbidden,
217 // and `http::HeaderValue` accepts them. The guard mirrors
218 // `http`'s policy so we don't second-guess the upstream.
219 let v = HeaderEscapeGuard::header_value("café").unwrap();
220 assert_eq!(v.as_bytes(), "café".as_bytes());
221 }
222
223 // --- Rejection paths ---------------------------------------------
224
225 #[test]
226 fn rejects_carriage_return() {
227 assert_eq!(
228 HeaderEscapeGuard::header_value("evil\rinjected"),
229 Err(EscapeError::ContainsCrlf)
230 );
231 }
232
233 #[test]
234 fn rejects_line_feed() {
235 assert_eq!(
236 HeaderEscapeGuard::header_value("evil\ninjected"),
237 Err(EscapeError::ContainsCrlf)
238 );
239 }
240
241 #[test]
242 fn rejects_crlf_pair_for_response_splitting() {
243 // The classic response-splitting shape: terminate the
244 // current header, splice a second header, splice a body.
245 let payload = "ok\r\nX-Forged: 1\r\n\r\n<html>pwned</html>";
246 assert_eq!(
247 HeaderEscapeGuard::header_value(payload),
248 Err(EscapeError::ContainsCrlf)
249 );
250 }
251
252 #[test]
253 fn rejects_nul() {
254 assert_eq!(
255 HeaderEscapeGuard::header_value("trunc\0ate"),
256 Err(EscapeError::ContainsNull)
257 );
258 }
259
260 #[test]
261 fn rejects_tab() {
262 assert_eq!(
263 HeaderEscapeGuard::header_value("split\tlog"),
264 Err(EscapeError::ContainsTab)
265 );
266 }
267
268 #[test]
269 fn rejects_backspace() {
270 assert_eq!(
271 HeaderEscapeGuard::header_value("over\u{0008}type"),
272 Err(EscapeError::ContainsNonPrintable(0x08))
273 );
274 }
275
276 #[test]
277 fn rejects_bell() {
278 assert_eq!(
279 HeaderEscapeGuard::header_value("ding\u{0007}!"),
280 Err(EscapeError::ContainsNonPrintable(0x07))
281 );
282 }
283
284 #[test]
285 fn rejects_form_feed() {
286 assert_eq!(
287 HeaderEscapeGuard::header_value("page\u{000C}break"),
288 Err(EscapeError::ContainsNonPrintable(0x0C))
289 );
290 }
291
292 #[test]
293 fn rejects_vertical_tab() {
294 assert_eq!(
295 HeaderEscapeGuard::header_value("vert\u{000B}tab"),
296 Err(EscapeError::ContainsNonPrintable(0x0B))
297 );
298 }
299
300 #[test]
301 fn rejects_escape_byte() {
302 assert_eq!(
303 HeaderEscapeGuard::header_value("\u{001B}[31mred"),
304 Err(EscapeError::ContainsNonPrintable(0x1B))
305 );
306 }
307
308 #[test]
309 fn rejects_del_byte() {
310 assert_eq!(
311 HeaderEscapeGuard::header_value("hello\u{007F}"),
312 Err(EscapeError::ContainsNonPrintable(0x7F))
313 );
314 }
315
316 #[test]
317 fn rejects_oversize() {
318 let s = "a".repeat(MAX_HEADER_VALUE_BYTES + 1);
319 assert_eq!(
320 HeaderEscapeGuard::header_value(&s),
321 Err(EscapeError::OversizeForBoundary(MAX_HEADER_VALUE_BYTES + 1))
322 );
323 }
324
325 #[test]
326 fn oversize_check_runs_before_byte_scan() {
327 // Even a value full of CRLFs reports as oversize when it
328 // also exceeds the length cap. Cheap test that fixes the
329 // observable order; if a future refactor flips the order
330 // we want a deliberate signal.
331 let mut s = String::with_capacity(MAX_HEADER_VALUE_BYTES + 4);
332 for _ in 0..(MAX_HEADER_VALUE_BYTES / 2 + 1) {
333 s.push_str("\r\n");
334 }
335 let n = s.len();
336 assert_eq!(
337 HeaderEscapeGuard::header_value(&s),
338 Err(EscapeError::OversizeForBoundary(n))
339 );
340 }
341
342 // --- Error display formatting ------------------------------------
343
344 #[test]
345 fn error_display_mentions_byte_class() {
346 assert!(EscapeError::ContainsCrlf.to_string().contains("CR or LF"));
347 assert!(EscapeError::ContainsNull.to_string().contains("NUL"));
348 assert!(EscapeError::ContainsTab.to_string().contains("TAB"));
349 assert!(EscapeError::ContainsNonPrintable(0x07)
350 .to_string()
351 .contains("0x07"));
352 assert!(EscapeError::OversizeForBoundary(99_999)
353 .to_string()
354 .contains("99999"));
355 }
356
357 // --- Snapshot of escaped output for known fixtures ---------------
358 //
359 // Per issue #176 acceptance criteria. We don't pull in `insta` for
360 // a single snapshot; the assertion is inline so it survives a
361 // refactor without depending on a dev-only crate.
362
363 #[test]
364 fn snapshot_known_fixtures() {
365 // (input, expected outcome). Order is documentation: each
366 // line shows a known-shape attacker string and the verdict
367 // the guard must return.
368 let cases: &[(&str, Result<&[u8], EscapeError>)] = &[
369 ("application/json", Ok(b"application/json")),
370 (
371 "max-age=31536000; includeSubDomains",
372 Ok(b"max-age=31536000; includeSubDomains"),
373 ),
374 ("nosniff", Ok(b"nosniff")),
375 ("DENY", Ok(b"DENY")),
376 ("\"abc-123\"", Ok(b"\"abc-123\"")),
377 ("evil\r\nLocation: /pwned", Err(EscapeError::ContainsCrlf)),
378 ("set-cookie\nset-cookie", Err(EscapeError::ContainsCrlf)),
379 (
380 "bell\x07alarm",
381 Err(EscapeError::ContainsNonPrintable(0x07)),
382 ),
383 ("trunc\0ate", Err(EscapeError::ContainsNull)),
384 ("split\there", Err(EscapeError::ContainsTab)),
385 ];
386 for (input, expected) in cases {
387 let got = HeaderEscapeGuard::header_value(input);
388 match (expected, &got) {
389 (Ok(bytes), Ok(v)) => {
390 assert_eq!(v.as_bytes(), *bytes, "input {input:?} produced wrong bytes")
391 }
392 (Err(want), Err(got_err)) => {
393 assert_eq!(want, got_err, "input {input:?} produced wrong error")
394 }
395 (Ok(_), Err(e)) => panic!("input {input:?} unexpectedly rejected: {e:?}"),
396 (Err(want), Ok(v)) => panic!(
397 "input {input:?} unexpectedly accepted (bytes={:?}); wanted {want:?}",
398 v.as_bytes()
399 ),
400 }
401 }
402 }
403
404 // --- Byte-level fuzz / proptest-style coverage --------------------
405 //
406 // The `proptest` crate is a dev-dep at the workspace root. The
407 // assertion shape we want is small enough that we hand-roll a
408 // deterministic byte-level sweep here rather than pull `proptest`
409 // into this module, keeping the test fast and reproducible.
410
411 #[test]
412 fn fuzz_every_single_byte_position() {
413 // Inserting any rejected byte at any position in an
414 // otherwise-clean value must trigger the typed error for
415 // that byte class.
416 for byte in 0u8..=0x1F {
417 for pos in [0usize, 5, 9] {
418 let mut bytes = b"abcdefghij".to_vec();
419 bytes.insert(pos, byte);
420 let s = String::from_utf8(bytes).unwrap();
421 let got = HeaderEscapeGuard::header_value(&s);
422 let want = match byte {
423 b'\r' | b'\n' => EscapeError::ContainsCrlf,
424 0 => EscapeError::ContainsNull,
425 b'\t' => EscapeError::ContainsTab,
426 _ => EscapeError::ContainsNonPrintable(byte),
427 };
428 assert_eq!(got, Err(want), "byte 0x{byte:02X} at pos {pos}");
429 }
430 }
431 // DEL is the lone non-control rejected byte above 0x20.
432 assert_eq!(
433 HeaderEscapeGuard::header_value("a\u{007F}b"),
434 Err(EscapeError::ContainsNonPrintable(0x7F))
435 );
436 }
437
438 #[test]
439 fn fuzz_every_printable_ascii_accepted() {
440 for byte in 0x20u8..0x7F {
441 let s = format!("x{}y", byte as char);
442 assert!(
443 HeaderEscapeGuard::header_value(&s).is_ok(),
444 "byte 0x{byte:02X} should be accepted",
445 );
446 }
447 }
448
449 #[test]
450 fn fuzz_every_high_bit_byte_accepted() {
451 // 0x80..=0xFF must round-trip — the guard does not enforce
452 // ASCII-only output. Note we build the value as raw bytes
453 // and convert via from_utf8_unchecked-equivalent: we keep
454 // the test memory-safe by constructing a single-byte
455 // payload that is valid UTF-8 only when the byte is < 0x80
456 // and otherwise wrapping it in a multi-byte UTF-8 lead.
457 // The guard takes `&str`, so we route every high-bit byte
458 // through a UTF-8-valid encoding.
459 for codepoint in 0x80u32..=0xFF {
460 let s = char::from_u32(codepoint).unwrap().to_string();
461 let v = HeaderEscapeGuard::header_value(&s).unwrap();
462 // The bytes round-trip exactly as the input UTF-8.
463 assert_eq!(v.as_bytes(), s.as_bytes());
464 }
465 }
466
467 #[test]
468 fn fuzz_oversize_boundary() {
469 // The exact boundary is accepted; one byte past is rejected.
470 let exact = "a".repeat(MAX_HEADER_VALUE_BYTES);
471 assert!(HeaderEscapeGuard::header_value(&exact).is_ok());
472 let over = "a".repeat(MAX_HEADER_VALUE_BYTES + 1);
473 assert_eq!(
474 HeaderEscapeGuard::header_value(&over),
475 Err(EscapeError::OversizeForBoundary(MAX_HEADER_VALUE_BYTES + 1))
476 );
477 }
478
479 #[test]
480 fn fuzz_concatenation_attacks() {
481 // The shape the Whiz / Babeld disclosure made famous:
482 // suffix a control sequence after a benign-looking prefix.
483 let trailers = [
484 "\r\n",
485 "\n",
486 "\r",
487 "\r\nX-Forged: 1",
488 "\r\nLocation: http://attacker/",
489 "\r\n\r\n<html>",
490 ];
491 for trailer in trailers {
492 let payload = format!("application/json{trailer}");
493 assert_eq!(
494 HeaderEscapeGuard::header_value(&payload),
495 Err(EscapeError::ContainsCrlf),
496 "payload {payload:?} must reject"
497 );
498 }
499 }
500}