Skip to main content

reddb_server/server/
header_escape_guard.rs

1//! `HeaderEscapeGuard` — typed boundary guard for HTTP response header values.
2//!
3//! Per ADR 0010 (`docs/adr/0010-serialization-boundary-discipline.md`)
4//! and issue #176, the producing side of every serialization boundary
5//! is owned by a typed guard whose only job is to know the boundary's
6//! escape contract. This module is the guard for HTTP response header
7//! values.
8//!
9//! ## Why this guard exists
10//!
11//! HTTP/1.1 frames headers as `name: value\r\n` pairs terminated by a
12//! double `\r\n`. If a header value contains a raw CR or LF, an
13//! attacker can splice a second header (or the entire body) past the
14//! original framing — the classic CRLF-injection / response-splitting
15//! shape called out by the Whiz / Babeld disclosure (March 2026).
16//!
17//! ## Contract
18//!
19//! `HeaderEscapeGuard::header_value(s)` returns a typed
20//! `http::HeaderValue` if and only if `s` is safe for an HTTP/1.1
21//! response header value:
22//!
23//! - No CR (`\r`) or LF (`\n`) — these terminate the header line.
24//! - No NUL (`\0`) — proxies and intermediaries truncate on NUL.
25//! - No tab (`\t`) — RFC 7230 admits HTAB inside header values, but
26//!   it is the most common smuggling lever for downstream log
27//!   pipelines that split on whitespace, and there is no legitimate
28//!   producer-side reason for RedDB to emit one.
29//! - No other ASCII control byte (0x00–0x1F, 0x7F).
30//! - Bounded length: 8 KiB ceiling per value. Real HTTP intermediaries
31//!   start dropping connections well before this; the guard rejects
32//!   early so a misuse becomes a typed error, not a runtime hang.
33//!
34//! Non-ASCII bytes (0x80–0xFF) are *permitted* — RFC 7230 §3.2.6
35//! discourages them but does not forbid them, and `http::HeaderValue`
36//! accepts them. Producers should emit ASCII; the guard does not
37//! police that.
38//!
39//! ## Failure mode
40//!
41//! Every rejection path returns a typed `EscapeError`. Callers must
42//! propagate the error to the HTTP boundary — the guard never silently
43//! truncates, replaces, or escapes-around a control byte. Silent
44//! mangling at this layer is the exact failure shape ADR 0010 is
45//! designed to prevent.
46//!
47//! ## Out of scope
48//!
49//! - Header *names*. RedDB sets header names from `&'static str`
50//!   literals only; the names live in source code, not in user input.
51//!   If a future surface admits user-supplied header names, that
52//!   needs its own guard.
53//! - Request-side headers. Inbound parsing already happens in
54//!   `transport::HttpRequest::read_from`; the inbound parser is a
55//!   separate concern.
56
57use std::fmt;
58
59use http::HeaderValue;
60
61/// Maximum permitted header value length, in bytes.
62///
63/// Chosen to be permissive enough for any realistic header value
64/// (URLs, JWT tokens, Set-Cookie payloads with attributes) yet small
65/// enough that a misuse — an attacker pushing megabytes through a
66/// header — surfaces as a typed error long before it eats memory or
67/// stalls the connection. 8 KiB matches the `request headers too
68/// large` ceiling already enforced by `HttpRequest::read_from` for
69/// inbound headers, keeping the inbound and outbound limits
70/// symmetric.
71pub const MAX_HEADER_VALUE_BYTES: usize = 8 * 1024;
72
73/// Reasons `HeaderEscapeGuard::header_value` rejects a string.
74///
75/// Each variant names the exact byte class that triggered the
76/// rejection so the caller can build a useful 4xx / 500 response and
77/// the audit log gets a structured diagnostic, not a hand-formatted
78/// string. The byte payload on `ContainsNonPrintable` is the
79/// offending byte itself, useful for debug logs and for tests
80/// asserting the guard caught the right byte.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum EscapeError {
83    /// Value contained CR (`\r`) or LF (`\n`).
84    ContainsCrlf,
85    /// Value contained NUL (`\0`).
86    ContainsNull,
87    /// Value contained HTAB (`\t`).
88    ContainsTab,
89    /// Value contained another non-printable ASCII byte
90    /// (0x01–0x08, 0x0B, 0x0C, 0x0E–0x1F, or 0x7F). The payload is
91    /// the offending byte for diagnostic clarity.
92    ContainsNonPrintable(u8),
93    /// Value exceeds [`MAX_HEADER_VALUE_BYTES`]. The payload is the
94    /// observed length so the caller can include it in the error
95    /// reply.
96    OversizeForBoundary(usize),
97}
98
99impl fmt::Display for EscapeError {
100    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101        match self {
102            Self::ContainsCrlf => {
103                f.write_str("header value contains CR or LF (would smuggle a second header line)")
104            }
105            Self::ContainsNull => f.write_str(
106                "header value contains NUL (proxies and intermediaries truncate on NUL)",
107            ),
108            Self::ContainsTab => f.write_str(
109                "header value contains TAB (downstream log pipelines split on whitespace)",
110            ),
111            Self::ContainsNonPrintable(b) => {
112                write!(f, "header value contains non-printable byte 0x{b:02X}")
113            }
114            Self::OversizeForBoundary(n) => write!(
115                f,
116                "header value length {n} exceeds the {MAX_HEADER_VALUE_BYTES}-byte boundary limit"
117            ),
118        }
119    }
120}
121
122impl std::error::Error for EscapeError {}
123
124/// Typed guard for HTTP response header values.
125///
126/// The struct is zero-sized; it exists for the namespace and for
127/// future extensions (e.g., per-boundary length policies). Callers
128/// invoke the guard exclusively through associated functions.
129///
130/// ```ignore
131/// use crate::server::header_escape_guard::{HeaderEscapeGuard, EscapeError};
132///
133/// let value = HeaderEscapeGuard::header_value("max-age=3600")?;
134/// // value is now an `http::HeaderValue` safe to attach to a
135/// // response. Attempting to splice a second header line is
136/// // rejected at the type boundary:
137/// assert!(matches!(
138///     HeaderEscapeGuard::header_value("evil\r\nX-Forged: 1"),
139///     Err(EscapeError::ContainsCrlf),
140/// ));
141/// # Ok::<(), EscapeError>(())
142/// ```
143pub struct HeaderEscapeGuard;
144
145impl HeaderEscapeGuard {
146    /// Validate `s` and wrap it in a typed `http::HeaderValue`.
147    ///
148    /// Returns the typed error (`EscapeError`) on the first byte that
149    /// violates the contract. The order of checks is: oversize →
150    /// CRLF → NUL → TAB → other non-printable. Callers must not
151    /// assume the order — only that some violation triggered the
152    /// rejection.
153    pub fn header_value(s: &str) -> Result<HeaderValue, EscapeError> {
154        let bytes = s.as_bytes();
155        if bytes.len() > MAX_HEADER_VALUE_BYTES {
156            return Err(EscapeError::OversizeForBoundary(bytes.len()));
157        }
158        for &b in bytes {
159            match b {
160                b'\r' | b'\n' => return Err(EscapeError::ContainsCrlf),
161                0 => return Err(EscapeError::ContainsNull),
162                b'\t' => return Err(EscapeError::ContainsTab),
163                // Other ASCII control bytes: 0x01–0x08, 0x0B, 0x0C,
164                // 0x0E–0x1F, plus DEL (0x7F).
165                0x01..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F | 0x7F => {
166                    return Err(EscapeError::ContainsNonPrintable(b));
167                }
168                _ => {}
169            }
170        }
171        // SAFETY-equivalent: every byte we accepted is a printable
172        // ASCII byte, a space, or 0x80..=0xFF — all of which
173        // `HeaderValue::from_bytes` accepts. The construction can
174        // only fail for the exact bytes we already rejected, so an
175        // error here is unreachable in well-formed code; we surface
176        // it as the closest typed error rather than panicking so a
177        // future tightening of `http`'s rules degrades gracefully.
178        HeaderValue::from_bytes(bytes).map_err(|_| EscapeError::ContainsNonPrintable(0))
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    // --- Happy paths --------------------------------------------------
187
188    #[test]
189    fn accepts_simple_ascii() {
190        let v = HeaderEscapeGuard::header_value("application/json").unwrap();
191        assert_eq!(v.as_bytes(), b"application/json");
192    }
193
194    #[test]
195    fn accepts_empty_string() {
196        // RFC 7230 §3.2 admits empty header values.
197        let v = HeaderEscapeGuard::header_value("").unwrap();
198        assert_eq!(v.as_bytes(), b"");
199    }
200
201    #[test]
202    fn accepts_value_with_spaces_and_punctuation() {
203        let v = HeaderEscapeGuard::header_value("text/html; charset=utf-8, q=0.9").unwrap();
204        assert_eq!(v.as_bytes(), b"text/html; charset=utf-8, q=0.9");
205    }
206
207    #[test]
208    fn accepts_max_length_value() {
209        let s = "a".repeat(MAX_HEADER_VALUE_BYTES);
210        let v = HeaderEscapeGuard::header_value(&s).unwrap();
211        assert_eq!(v.as_bytes().len(), MAX_HEADER_VALUE_BYTES);
212    }
213
214    #[test]
215    fn accepts_high_bit_bytes() {
216        // 0x80..=0xFF are discouraged by RFC 7230 but not forbidden,
217        // and `http::HeaderValue` accepts them. The guard mirrors
218        // `http`'s policy so we don't second-guess the upstream.
219        let v = HeaderEscapeGuard::header_value("café").unwrap();
220        assert_eq!(v.as_bytes(), "café".as_bytes());
221    }
222
223    // --- Rejection paths ---------------------------------------------
224
225    #[test]
226    fn rejects_carriage_return() {
227        assert_eq!(
228            HeaderEscapeGuard::header_value("evil\rinjected"),
229            Err(EscapeError::ContainsCrlf)
230        );
231    }
232
233    #[test]
234    fn rejects_line_feed() {
235        assert_eq!(
236            HeaderEscapeGuard::header_value("evil\ninjected"),
237            Err(EscapeError::ContainsCrlf)
238        );
239    }
240
241    #[test]
242    fn rejects_crlf_pair_for_response_splitting() {
243        // The classic response-splitting shape: terminate the
244        // current header, splice a second header, splice a body.
245        let payload = "ok\r\nX-Forged: 1\r\n\r\n<html>pwned</html>";
246        assert_eq!(
247            HeaderEscapeGuard::header_value(payload),
248            Err(EscapeError::ContainsCrlf)
249        );
250    }
251
252    #[test]
253    fn rejects_nul() {
254        assert_eq!(
255            HeaderEscapeGuard::header_value("trunc\0ate"),
256            Err(EscapeError::ContainsNull)
257        );
258    }
259
260    #[test]
261    fn rejects_tab() {
262        assert_eq!(
263            HeaderEscapeGuard::header_value("split\tlog"),
264            Err(EscapeError::ContainsTab)
265        );
266    }
267
268    #[test]
269    fn rejects_backspace() {
270        assert_eq!(
271            HeaderEscapeGuard::header_value("over\u{0008}type"),
272            Err(EscapeError::ContainsNonPrintable(0x08))
273        );
274    }
275
276    #[test]
277    fn rejects_bell() {
278        assert_eq!(
279            HeaderEscapeGuard::header_value("ding\u{0007}!"),
280            Err(EscapeError::ContainsNonPrintable(0x07))
281        );
282    }
283
284    #[test]
285    fn rejects_form_feed() {
286        assert_eq!(
287            HeaderEscapeGuard::header_value("page\u{000C}break"),
288            Err(EscapeError::ContainsNonPrintable(0x0C))
289        );
290    }
291
292    #[test]
293    fn rejects_vertical_tab() {
294        assert_eq!(
295            HeaderEscapeGuard::header_value("vert\u{000B}tab"),
296            Err(EscapeError::ContainsNonPrintable(0x0B))
297        );
298    }
299
300    #[test]
301    fn rejects_escape_byte() {
302        assert_eq!(
303            HeaderEscapeGuard::header_value("\u{001B}[31mred"),
304            Err(EscapeError::ContainsNonPrintable(0x1B))
305        );
306    }
307
308    #[test]
309    fn rejects_del_byte() {
310        assert_eq!(
311            HeaderEscapeGuard::header_value("hello\u{007F}"),
312            Err(EscapeError::ContainsNonPrintable(0x7F))
313        );
314    }
315
316    #[test]
317    fn rejects_oversize() {
318        let s = "a".repeat(MAX_HEADER_VALUE_BYTES + 1);
319        assert_eq!(
320            HeaderEscapeGuard::header_value(&s),
321            Err(EscapeError::OversizeForBoundary(MAX_HEADER_VALUE_BYTES + 1))
322        );
323    }
324
325    #[test]
326    fn oversize_check_runs_before_byte_scan() {
327        // Even a value full of CRLFs reports as oversize when it
328        // also exceeds the length cap. Cheap test that fixes the
329        // observable order; if a future refactor flips the order
330        // we want a deliberate signal.
331        let mut s = String::with_capacity(MAX_HEADER_VALUE_BYTES + 4);
332        for _ in 0..(MAX_HEADER_VALUE_BYTES / 2 + 1) {
333            s.push_str("\r\n");
334        }
335        let n = s.len();
336        assert_eq!(
337            HeaderEscapeGuard::header_value(&s),
338            Err(EscapeError::OversizeForBoundary(n))
339        );
340    }
341
342    // --- Error display formatting ------------------------------------
343
344    #[test]
345    fn error_display_mentions_byte_class() {
346        assert!(EscapeError::ContainsCrlf.to_string().contains("CR or LF"));
347        assert!(EscapeError::ContainsNull.to_string().contains("NUL"));
348        assert!(EscapeError::ContainsTab.to_string().contains("TAB"));
349        assert!(EscapeError::ContainsNonPrintable(0x07)
350            .to_string()
351            .contains("0x07"));
352        assert!(EscapeError::OversizeForBoundary(99_999)
353            .to_string()
354            .contains("99999"));
355    }
356
357    // --- Snapshot of escaped output for known fixtures ---------------
358    //
359    // Per issue #176 acceptance criteria. We don't pull in `insta` for
360    // a single snapshot; the assertion is inline so it survives a
361    // refactor without depending on a dev-only crate.
362
363    #[test]
364    fn snapshot_known_fixtures() {
365        // (input, expected outcome). Order is documentation: each
366        // line shows a known-shape attacker string and the verdict
367        // the guard must return.
368        let cases: &[(&str, Result<&[u8], EscapeError>)] = &[
369            ("application/json", Ok(b"application/json")),
370            (
371                "max-age=31536000; includeSubDomains",
372                Ok(b"max-age=31536000; includeSubDomains"),
373            ),
374            ("nosniff", Ok(b"nosniff")),
375            ("DENY", Ok(b"DENY")),
376            ("\"abc-123\"", Ok(b"\"abc-123\"")),
377            ("evil\r\nLocation: /pwned", Err(EscapeError::ContainsCrlf)),
378            ("set-cookie\nset-cookie", Err(EscapeError::ContainsCrlf)),
379            (
380                "bell\x07alarm",
381                Err(EscapeError::ContainsNonPrintable(0x07)),
382            ),
383            ("trunc\0ate", Err(EscapeError::ContainsNull)),
384            ("split\there", Err(EscapeError::ContainsTab)),
385        ];
386        for (input, expected) in cases {
387            let got = HeaderEscapeGuard::header_value(input);
388            match (expected, &got) {
389                (Ok(bytes), Ok(v)) => {
390                    assert_eq!(v.as_bytes(), *bytes, "input {input:?} produced wrong bytes")
391                }
392                (Err(want), Err(got_err)) => {
393                    assert_eq!(want, got_err, "input {input:?} produced wrong error")
394                }
395                (Ok(_), Err(e)) => panic!("input {input:?} unexpectedly rejected: {e:?}"),
396                (Err(want), Ok(v)) => panic!(
397                    "input {input:?} unexpectedly accepted (bytes={:?}); wanted {want:?}",
398                    v.as_bytes()
399                ),
400            }
401        }
402    }
403
404    // --- Byte-level fuzz / proptest-style coverage --------------------
405    //
406    // The `proptest` crate is a dev-dep at the workspace root. The
407    // assertion shape we want is small enough that we hand-roll a
408    // deterministic byte-level sweep here rather than pull `proptest`
409    // into this module, keeping the test fast and reproducible.
410
411    #[test]
412    fn fuzz_every_single_byte_position() {
413        // Inserting any rejected byte at any position in an
414        // otherwise-clean value must trigger the typed error for
415        // that byte class.
416        for byte in 0u8..=0x1F {
417            for pos in [0usize, 5, 9] {
418                let mut bytes = b"abcdefghij".to_vec();
419                bytes.insert(pos, byte);
420                let s = String::from_utf8(bytes).unwrap();
421                let got = HeaderEscapeGuard::header_value(&s);
422                let want = match byte {
423                    b'\r' | b'\n' => EscapeError::ContainsCrlf,
424                    0 => EscapeError::ContainsNull,
425                    b'\t' => EscapeError::ContainsTab,
426                    _ => EscapeError::ContainsNonPrintable(byte),
427                };
428                assert_eq!(got, Err(want), "byte 0x{byte:02X} at pos {pos}");
429            }
430        }
431        // DEL is the lone non-control rejected byte above 0x20.
432        assert_eq!(
433            HeaderEscapeGuard::header_value("a\u{007F}b"),
434            Err(EscapeError::ContainsNonPrintable(0x7F))
435        );
436    }
437
438    #[test]
439    fn fuzz_every_printable_ascii_accepted() {
440        for byte in 0x20u8..0x7F {
441            let s = format!("x{}y", byte as char);
442            assert!(
443                HeaderEscapeGuard::header_value(&s).is_ok(),
444                "byte 0x{byte:02X} should be accepted",
445            );
446        }
447    }
448
449    #[test]
450    fn fuzz_every_high_bit_byte_accepted() {
451        // 0x80..=0xFF must round-trip — the guard does not enforce
452        // ASCII-only output. Note we build the value as raw bytes
453        // and convert via from_utf8_unchecked-equivalent: we keep
454        // the test memory-safe by constructing a single-byte
455        // payload that is valid UTF-8 only when the byte is < 0x80
456        // and otherwise wrapping it in a multi-byte UTF-8 lead.
457        // The guard takes `&str`, so we route every high-bit byte
458        // through a UTF-8-valid encoding.
459        for codepoint in 0x80u32..=0xFF {
460            let s = char::from_u32(codepoint).unwrap().to_string();
461            let v = HeaderEscapeGuard::header_value(&s).unwrap();
462            // The bytes round-trip exactly as the input UTF-8.
463            assert_eq!(v.as_bytes(), s.as_bytes());
464        }
465    }
466
467    #[test]
468    fn fuzz_oversize_boundary() {
469        // The exact boundary is accepted; one byte past is rejected.
470        let exact = "a".repeat(MAX_HEADER_VALUE_BYTES);
471        assert!(HeaderEscapeGuard::header_value(&exact).is_ok());
472        let over = "a".repeat(MAX_HEADER_VALUE_BYTES + 1);
473        assert_eq!(
474            HeaderEscapeGuard::header_value(&over),
475            Err(EscapeError::OversizeForBoundary(MAX_HEADER_VALUE_BYTES + 1))
476        );
477    }
478
479    #[test]
480    fn fuzz_concatenation_attacks() {
481        // The shape the Whiz / Babeld disclosure made famous:
482        // suffix a control sequence after a benign-looking prefix.
483        let trailers = [
484            "\r\n",
485            "\n",
486            "\r",
487            "\r\nX-Forged: 1",
488            "\r\nLocation: http://attacker/",
489            "\r\n\r\n<html>",
490        ];
491        for trailer in trailers {
492            let payload = format!("application/json{trailer}");
493            assert_eq!(
494                HeaderEscapeGuard::header_value(&payload),
495                Err(EscapeError::ContainsCrlf),
496                "payload {payload:?} must reject"
497            );
498        }
499    }
500}